/******************************************************************************
*
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
******************************************************************************
* @file hme_refine.c
*
* @brief
* Contains the implementation of the refinement layer searches and related
* functionality like CU merge.
*
* @author
* Ittiam
*
*
* List of Functions
*
*
******************************************************************************
*/
/*****************************************************************************/
/* File Includes */
/*****************************************************************************/
/* System include files */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include <stdarg.h>
#include <math.h>
#include <limits.h>
/* User include files */
#include "ihevc_typedefs.h"
#include "itt_video_api.h"
#include "ihevce_api.h"
#include "rc_cntrl_param.h"
#include "rc_frame_info_collector.h"
#include "rc_look_ahead_params.h"
#include "ihevc_defs.h"
#include "ihevc_structs.h"
#include "ihevc_platform_macros.h"
#include "ihevc_deblk.h"
#include "ihevc_itrans_recon.h"
#include "ihevc_chroma_itrans_recon.h"
#include "ihevc_chroma_intra_pred.h"
#include "ihevc_intra_pred.h"
#include "ihevc_inter_pred.h"
#include "ihevc_mem_fns.h"
#include "ihevc_padding.h"
#include "ihevc_weighted_pred.h"
#include "ihevc_sao.h"
#include "ihevc_resi_trans.h"
#include "ihevc_quant_iquant_ssd.h"
#include "ihevc_cabac_tables.h"
#include "ihevce_defs.h"
#include "ihevce_lap_enc_structs.h"
#include "ihevce_multi_thrd_structs.h"
#include "ihevce_multi_thrd_funcs.h"
#include "ihevce_me_common_defs.h"
#include "ihevce_had_satd.h"
#include "ihevce_error_codes.h"
#include "ihevce_bitstream.h"
#include "ihevce_cabac.h"
#include "ihevce_rdoq_macros.h"
#include "ihevce_function_selector.h"
#include "ihevce_enc_structs.h"
#include "ihevce_entropy_structs.h"
#include "ihevce_cmn_utils_instr_set_router.h"
#include "ihevce_enc_loop_structs.h"
#include "ihevce_bs_compute_ctb.h"
#include "ihevce_global_tables.h"
#include "ihevce_dep_mngr_interface.h"
#include "hme_datatype.h"
#include "hme_interface.h"
#include "hme_common_defs.h"
#include "hme_defs.h"
#include "ihevce_me_instr_set_router.h"
#include "hme_globals.h"
#include "hme_utils.h"
#include "hme_coarse.h"
#include "hme_fullpel.h"
#include "hme_subpel.h"
#include "hme_refine.h"
#include "hme_err_compute.h"
#include "hme_common_utils.h"
#include "hme_search_algo.h"
#include "ihevce_stasino_helpers.h"
#include "ihevce_common_utils.h"
/*****************************************************************************/
/* Globals */
/*****************************************************************************/
/* brief: mapping buffer to convert raster scan indices into z-scan oder in a ctb */
UWORD8 gau1_raster_scan_to_ctb[4][4] = {
{ 0, 4, 16, 20 }, { 8, 12, 24, 28 }, { 32, 36, 48, 52 }, { 40, 44, 56, 60 }
};
/*****************************************************************************/
/* Extern Fucntion declaration */
/*****************************************************************************/
extern ctb_boundary_attrs_t *
get_ctb_attrs(S32 ctb_start_x, S32 ctb_start_y, S32 pic_wd, S32 pic_ht, me_frm_ctxt_t *ps_ctxt);
typedef void (*PF_HME_PROJECT_COLOC_CANDT_FXN)(
search_node_t *ps_search_node,
layer_ctxt_t *ps_curr_layer,
layer_ctxt_t *ps_coarse_layer,
S32 i4_pos_x,
S32 i4_pos_y,
S08 i1_ref_id,
S32 i4_result_id);
typedef void (*PF_HME_PROJECT_COLOC_CANDT_L0_ME_FXN)(
search_node_t *ps_search_node,
layer_ctxt_t *ps_curr_layer,
layer_ctxt_t *ps_coarse_layer,
S32 i4_pos_x,
S32 i4_pos_y,
S32 i4_num_act_ref_l0,
U08 u1_pred_dir,
U08 u1_default_ref_id,
S32 i4_result_id);
/*****************************************************************************/
/* Function Definitions */
/*****************************************************************************/
void ihevce_no_wt_copy(
coarse_me_ctxt_t *ps_ctxt,
layer_ctxt_t *ps_curr_layer,
pu_t *ps_pu,
UWORD8 *pu1_temp_pred,
WORD32 temp_stride,
WORD32 blk_x,
WORD32 blk_y)
{
UWORD8 *pu1_ref;
WORD32 ref_stride, ref_offset;
WORD32 row, col, i4_tmp;
ASSERT((ps_pu->b2_pred_mode == PRED_L0) || (ps_pu->b2_pred_mode == PRED_L1));
if(ps_pu->b2_pred_mode == PRED_L0)
{
WORD8 i1_ref_idx;
i1_ref_idx = ps_pu->mv.i1_l0_ref_idx;
pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
ref_stride = ps_curr_layer->i4_inp_stride;
ref_offset = ((blk_y << 3) + ps_pu->mv.s_l0_mv.i2_mvy) * ref_stride;
ref_offset += (blk_x << 3) + ps_pu->mv.s_l0_mv.i2_mvx;
pu1_ref += ref_offset;
for(row = 0; row < temp_stride; row++)
{
for(col = 0; col < temp_stride; col++)
{
i4_tmp = pu1_ref[col];
pu1_temp_pred[col] = CLIP_U8(i4_tmp);
}
pu1_ref += ref_stride;
pu1_temp_pred += temp_stride;
}
}
else
{
WORD8 i1_ref_idx;
i1_ref_idx = ps_pu->mv.i1_l1_ref_idx;
pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
ref_stride = ps_curr_layer->i4_inp_stride;
ref_offset = ((blk_y << 3) + ps_pu->mv.s_l1_mv.i2_mvy) * ref_stride;
ref_offset += (blk_x << 3) + ps_pu->mv.s_l1_mv.i2_mvx;
pu1_ref += ref_offset;
for(row = 0; row < temp_stride; row++)
{
for(col = 0; col < temp_stride; col++)
{
i4_tmp = pu1_ref[col];
pu1_temp_pred[col] = CLIP_U8(i4_tmp);
}
pu1_ref += ref_stride;
pu1_temp_pred += temp_stride;
}
}
}
static WORD32 hme_add_clustered_mvs_as_merge_cands(
cluster_data_t *ps_cluster_base,
search_node_t *ps_merge_cand,
range_prms_t **pps_range_prms,
U08 *pu1_refid_to_pred_dir_list,
WORD32 i4_num_clusters,
U08 u1_pred_dir)
{
WORD32 i, j, k;
WORD32 i4_num_cands_added = 0;
WORD32 i4_num_mvs_in_cluster;
for(i = 0; i < i4_num_clusters; i++)
{
cluster_data_t *ps_data = &ps_cluster_base[i];
if(u1_pred_dir == !pu1_refid_to_pred_dir_list[ps_data->ref_id])
{
i4_num_mvs_in_cluster = ps_data->num_mvs;
for(j = 0; j < i4_num_mvs_in_cluster; j++)
{
ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_data->as_mv[j].mvx;
ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_data->as_mv[j].mvy;
ps_merge_cand[i4_num_cands_added].i1_ref_idx = ps_data->ref_id;
CLIP_MV_WITHIN_RANGE(
ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
pps_range_prms[ps_data->ref_id],
0,
0,
0);
for(k = 0; k < i4_num_cands_added; k++)
{
if((ps_merge_cand[k].s_mv.i2_mvx == ps_data->as_mv[j].mvx) &&
(ps_merge_cand[k].s_mv.i2_mvy == ps_data->as_mv[j].mvy) &&
(ps_merge_cand[k].i1_ref_idx == ps_data->ref_id))
{
break;
}
}
if(k == i4_num_cands_added)
{
i4_num_cands_added++;
}
}
}
}
return i4_num_cands_added;
}
static WORD32 hme_add_me_best_as_merge_cands(
search_results_t **pps_child_data_array,
inter_cu_results_t *ps_8x8cu_results,
search_node_t *ps_merge_cand,
range_prms_t **pps_range_prms,
U08 *pu1_refid_to_pred_dir_list,
S08 *pi1_past_list,
S08 *pi1_future_list,
BLK_SIZE_T e_blk_size,
ME_QUALITY_PRESETS_T e_quality_preset,
S32 i4_num_cands_added,
U08 u1_pred_dir)
{
WORD32 i, j, k;
WORD32 i4_max_cands_to_add;
WORD32 i4_result_id = 0;
ASSERT(!pps_child_data_array[0]->u1_split_flag || (BLK_64x64 != e_blk_size));
ASSERT(!pps_child_data_array[1]->u1_split_flag || (BLK_64x64 != e_blk_size));
ASSERT(!pps_child_data_array[2]->u1_split_flag || (BLK_64x64 != e_blk_size));
ASSERT(!pps_child_data_array[3]->u1_split_flag || (BLK_64x64 != e_blk_size));
switch(e_quality_preset)
{
case ME_PRISTINE_QUALITY:
{
i4_max_cands_to_add = MAX_MERGE_CANDTS;
break;
}
case ME_HIGH_QUALITY:
{
/* All 4 children are split and each grandchild contributes an MV */
/* and 2 best results per grandchild */
i4_max_cands_to_add = 4 * 4 * 2;
break;
}
case ME_MEDIUM_SPEED:
{
i4_max_cands_to_add = 4 * 2 * 2;
break;
}
case ME_HIGH_SPEED:
case ME_XTREME_SPEED:
case ME_XTREME_SPEED_25:
{
i4_max_cands_to_add = 4 * 2 * 1;
break;
}
}
while(i4_result_id < 4)
{
for(i = 0; i < 4; i++)
{
inter_cu_results_t *ps_child_data = pps_child_data_array[i]->ps_cu_results;
inter_cu_results_t *ps_grandchild_data = &ps_8x8cu_results[i << 2];
if(!pps_child_data_array[i]->u1_split_flag)
{
part_type_results_t *ps_data = &ps_child_data->ps_best_results[i4_result_id];
if(ps_child_data->u1_num_best_results <= i4_result_id)
{
continue;
}
if(ps_data->as_pu_results->pu.b1_intra_flag)
{
continue;
}
for(j = 0; j <= (ps_data->u1_part_type != PRT_2Nx2N); j++)
{
mv_t *ps_mv;
S08 i1_ref_idx;
pu_t *ps_pu = &ps_data->as_pu_results[j].pu;
if(u1_pred_dir !=
((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
{
continue;
}
if(u1_pred_dir)
{
ps_mv = &ps_pu->mv.s_l1_mv;
i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
}
else
{
ps_mv = &ps_pu->mv.s_l0_mv;
i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
}
if(-1 == i1_ref_idx)
{
continue;
}
ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
CLIP_MV_WITHIN_RANGE(
ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
pps_range_prms[i1_ref_idx],
0,
0,
0);
for(k = 0; k < i4_num_cands_added; k++)
{
if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
(ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
(ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
{
break;
}
}
if(k == i4_num_cands_added)
{
i4_num_cands_added++;
if(i4_max_cands_to_add <= i4_num_cands_added)
{
return i4_num_cands_added;
}
}
}
}
else
{
for(j = 0; j < 4; j++)
{
mv_t *ps_mv;
S08 i1_ref_idx;
part_type_results_t *ps_data = ps_grandchild_data[j].ps_best_results;
pu_t *ps_pu = &ps_data->as_pu_results[0].pu;
ASSERT(ps_data->u1_part_type == PRT_2Nx2N);
if(ps_grandchild_data[j].u1_num_best_results <= i4_result_id)
{
continue;
}
if(ps_data->as_pu_results->pu.b1_intra_flag)
{
continue;
}
if(u1_pred_dir !=
((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
{
continue;
}
if(u1_pred_dir)
{
ps_mv = &ps_pu->mv.s_l1_mv;
i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
}
else
{
ps_mv = &ps_pu->mv.s_l0_mv;
i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
}
ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
CLIP_MV_WITHIN_RANGE(
ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
pps_range_prms[i1_ref_idx],
0,
0,
0);
for(k = 0; k < i4_num_cands_added; k++)
{
if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
(ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
(ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
{
break;
}
}
if(k == i4_num_cands_added)
{
i4_num_cands_added++;
if(i4_max_cands_to_add <= i4_num_cands_added)
{
return i4_num_cands_added;
}
}
}
}
}
i4_result_id++;
}
return i4_num_cands_added;
}
WORD32 hme_add_cands_for_merge_eval(
ctb_cluster_info_t *ps_cluster_info,
search_results_t **pps_child_data_array,
inter_cu_results_t *ps_8x8cu_results,
range_prms_t **pps_range_prms,
search_node_t *ps_merge_cand,
U08 *pu1_refid_to_pred_dir_list,
S08 *pi1_past_list,
S08 *pi1_future_list,
ME_QUALITY_PRESETS_T e_quality_preset,
BLK_SIZE_T e_blk_size,
U08 u1_pred_dir,
U08 u1_blk_id)
{
WORD32 i4_num_cands_added = 0;
if(ME_PRISTINE_QUALITY == e_quality_preset)
{
cluster_data_t *ps_cluster_primo;
WORD32 i4_num_clusters;
if(BLK_32x32 == e_blk_size)
{
ps_cluster_primo = ps_cluster_info->ps_32x32_blk[u1_blk_id].as_cluster_data;
i4_num_clusters = ps_cluster_info->ps_32x32_blk[u1_blk_id].num_clusters;
}
else
{
ps_cluster_primo = ps_cluster_info->ps_64x64_blk->as_cluster_data;
i4_num_clusters = ps_cluster_info->ps_64x64_blk->num_clusters;
}
i4_num_cands_added = hme_add_clustered_mvs_as_merge_cands(
ps_cluster_primo,
ps_merge_cand,
pps_range_prms,
pu1_refid_to_pred_dir_list,
i4_num_clusters,
u1_pred_dir);
}
i4_num_cands_added = hme_add_me_best_as_merge_cands(
pps_child_data_array,
ps_8x8cu_results,
ps_merge_cand,
pps_range_prms,
pu1_refid_to_pred_dir_list,
pi1_past_list,
pi1_future_list,
e_blk_size,
e_quality_preset,
i4_num_cands_added,
u1_pred_dir);
return i4_num_cands_added;
}
/**
********************************************************************************
* @fn void hme_pick_refine_merge_candts(hme_merge_prms_t *ps_merge_prms,
* S08 i1_ref_idx,
* S32 i4_best_part_type,
* S32 i4_is_vert)
*
* @brief Given a target partition orientation in the merged CU, and the
* partition type of most likely partition this fxn picks up
* candidates from the 4 constituent CUs and does refinement search
* to identify best results for the merge CU across active partitions
*
* @param[in,out] ps_merge_prms : Parameters sent from higher layers. Out of
* these params, the search result structure is also derived and
* updated during the search
*
* @param[in] i1_ref_idx : ID of the buffer within the search results to update.
* Will be 0 if all refidx collapsed to one buf, else it'll be 0/1
*
* @param[in] i4_best_part_type : partition type of potential partition in the
* merged CU, -1 if the merge process has not yet been able to
* determine this.
*
* @param[in] i4_is_vert : Whether target partition of merged CU is vertical
* orientation or horizontal orientation.
*
* @return Number of merge candidates
********************************************************************************
*/
WORD32 hme_pick_eval_merge_candts(
hme_merge_prms_t *ps_merge_prms,
hme_subpel_prms_t *ps_subpel_prms,
S32 i4_search_idx,
S32 i4_best_part_type,
S32 i4_is_vert,
wgt_pred_ctxt_t *ps_wt_inp_prms,
S32 i4_frm_qstep,
ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
{
S32 x_off, y_off;
search_node_t *ps_search_node;
S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
S32 i4_num_valid_parts;
pred_ctxt_t *ps_pred_ctxt;
search_node_t as_merge_unique_node[MAX_MERGE_CANDTS];
S32 num_unique_nodes_cu_merge = 0;
search_results_t *ps_search_results = ps_merge_prms->ps_results_merge;
CU_SIZE_T e_cu_size = ps_search_results->e_cu_size;
S32 i4_part_mask = ps_search_results->i4_part_mask;
search_results_t *aps_child_results[4];
layer_ctxt_t *ps_curr_layer = ps_merge_prms->ps_layer_ctxt;
S32 i4_ref_stride, i, j;
result_upd_prms_t s_result_prms;
BLK_SIZE_T e_blk_size = ge_cu_size_to_blk_size[e_cu_size];
S32 i4_offset;
/*************************************************************************/
/* Function pointer for SAD/SATD, array and prms structure to pass to */
/* This function */
/*************************************************************************/
PF_SAD_FXN_T pf_err_compute;
S32 ai4_sad_grid[9][17];
err_prms_t s_err_prms;
/*************************************************************************/
/* Allowed MV RANGE */
/*************************************************************************/
range_prms_t **pps_range_prms = ps_merge_prms->aps_mv_range;
PF_INTERP_FXN_T pf_qpel_interp;
PF_MV_COST_FXN pf_mv_cost_compute;
WORD32 pred_lx;
U08 *apu1_hpel_ref[4];
interp_prms_t s_interp_prms;
S32 i4_interp_buf_id;
S32 i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
S32 i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
/* Sanity checks */
ASSERT((e_blk_size == BLK_64x64) || (e_blk_size == BLK_32x32));
s_err_prms.ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list;
/* Initialize all the ptrs to child CUs for merge decision */
aps_child_results[0] = ps_merge_prms->ps_results_tl;
aps_child_results[1] = ps_merge_prms->ps_results_tr;
aps_child_results[2] = ps_merge_prms->ps_results_bl;
aps_child_results[3] = ps_merge_prms->ps_results_br;
num_unique_nodes_cu_merge = 0;
pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
{
num_unique_nodes_cu_merge = hme_add_cands_for_merge_eval(
ps_merge_prms->ps_cluster_info,
aps_child_results,
ps_merge_prms->ps_8x8_cu_results,
pps_range_prms,
as_merge_unique_node,
ps_search_results->pu1_is_past,
ps_merge_prms->pi1_past_list,
ps_merge_prms->pi1_future_list,
ps_merge_prms->e_quality_preset,
e_blk_size,
i4_search_idx,
(ps_merge_prms->ps_results_merge->u1_x_off >> 5) +
(ps_merge_prms->ps_results_merge->u1_y_off >> 4));
}
else
{
/*************************************************************************/
/* Populate the list of unique search nodes in the child CUs for merge */
/* evaluation */
/*************************************************************************/
for(i = 0; i < 4; i++)
{
search_node_t s_search_node;
PART_TYPE_T e_part_type;
PART_ID_T e_part_id;
WORD32 part_num;
search_results_t *ps_child = aps_child_results[i];
if(ps_child->ps_cu_results->u1_num_best_results)
{
if(!((ps_child->ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
(1 == ps_child->ps_cu_results->u1_num_best_results)))
{
e_part_type =
(PART_TYPE_T)ps_child->ps_cu_results->ps_best_results[0].u1_part_type;
ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
/* Insert mvs of NxN partitions. */
for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
part_num++)
{
e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
if(ps_child->aps_part_results[i4_search_idx][e_part_id]->i1_ref_idx != -1)
{
s_search_node = *ps_child->aps_part_results[i4_search_idx][e_part_id];
if(s_search_node.s_mv.i2_mvx != INTRA_MV)
{
CLIP_MV_WITHIN_RANGE(
s_search_node.s_mv.i2_mvx,
s_search_node.s_mv.i2_mvy,
pps_range_prms[s_search_node.i1_ref_idx],
0,
0,
0);
INSERT_NEW_NODE_NOMAP(
as_merge_unique_node,
num_unique_nodes_cu_merge,
s_search_node,
1);
}
}
}
}
}
else if(!((ps_merge_prms->ps_results_grandchild[(i << 2)]
.ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
(1 == ps_merge_prms->ps_results_grandchild[(i << 2)]
.ps_cu_results->u1_num_best_results)))
{
search_results_t *ps_results_root = &ps_merge_prms->ps_results_grandchild[(i << 2)];
for(j = 0; j < 4; j++)
{
e_part_type = (PART_TYPE_T)ps_results_root[j]
.ps_cu_results->ps_best_results[0]
.u1_part_type;
ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
/* Insert mvs of NxN partitions. */
for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
part_num++)
{
e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
if((ps_results_root[j]
.aps_part_results[i4_search_idx][e_part_id]
->i1_ref_idx != -1) &&
(!ps_child->ps_cu_results->ps_best_results->as_pu_results->pu
.b1_intra_flag))
{
s_search_node =
*ps_results_root[j].aps_part_results[i4_search_idx][e_part_id];
if(s_search_node.s_mv.i2_mvx != INTRA_MV)
{
CLIP_MV_WITHIN_RANGE(
s_search_node.s_mv.i2_mvx,
s_search_node.s_mv.i2_mvy,
pps_range_prms[s_search_node.i1_ref_idx],
0,
0,
0);
INSERT_NEW_NODE_NOMAP(
as_merge_unique_node,
num_unique_nodes_cu_merge,
s_search_node,
1);
}
}
}
}
}
}
}
if(0 == num_unique_nodes_cu_merge)
{
return 0;
}
/*************************************************************************/
/* Appropriate Err compute fxn, depends on SAD/SATD, blk size and remains*/
/* fixed through this subpel refinement for this partition. */
/* Note, we do not enable grid sads since one pt is evaluated per node */
/* Hence, part mask is also nearly dont care and we use 2Nx2N enabled. */
/*************************************************************************/
i4_part_mask = ps_search_results->i4_part_mask;
/* Need to add the corresponding SAD functions for EXTREME SPEED : Lokesh */
if(ps_subpel_prms->i4_use_satd)
{
if(BLK_32x32 == e_blk_size)
{
pf_err_compute = hme_evalsatd_pt_pu_32x32;
}
else
{
pf_err_compute = hme_evalsatd_pt_pu_64x64;
}
}
else
{
pf_err_compute = (PF_SAD_FXN_T)hme_evalsad_grid_pu_MxM;
}
i4_ref_stride = ps_curr_layer->i4_rec_stride;
x_off = ps_merge_prms->ps_results_tl->u1_x_off;
y_off = ps_merge_prms->ps_results_tl->u1_y_off;
i4_offset = x_off + i4_ctb_x_off + ((y_off + i4_ctb_y_off) * i4_ref_stride);
/*************************************************************************/
/* This array stores the ids of the partitions whose */
/* SADs are updated. Since the partitions whose SADs are updated may not */
/* be in contiguous order, we supply another level of indirection. */
/*************************************************************************/
i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
/* Initialize result params used for partition update */
s_result_prms.pf_mv_cost_compute = NULL;
s_result_prms.ps_search_results = ps_search_results;
s_result_prms.pi4_valid_part_ids = ai4_valid_part_ids;
s_result_prms.i1_ref_idx = i4_search_idx;
s_result_prms.i4_part_mask = i4_part_mask;
s_result_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
s_result_prms.i4_grid_mask = 1;
/* One time Initialization of error params used for SAD/SATD compute */
s_err_prms.i4_inp_stride = ps_subpel_prms->i4_inp_stride;
s_err_prms.i4_ref_stride = i4_ref_stride;
s_err_prms.i4_part_mask = (ENABLE_2Nx2N);
s_err_prms.i4_grid_mask = 1;
s_err_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
s_err_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
s_err_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
s_err_prms.i4_step = 1;
/*************************************************************************/
/* One time preparation of non changing interpolation params. */
/*************************************************************************/
s_interp_prms.i4_ref_stride = i4_ref_stride;
s_interp_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
s_interp_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
s_interp_prms.apu1_interp_out[0] = ps_subpel_prms->pu1_wkg_mem;
s_interp_prms.i4_out_stride = gau1_blk_size_to_wd[e_blk_size];
i4_interp_buf_id = 0;
pf_qpel_interp = ps_subpel_prms->pf_qpel_interp;
/***************************************************************************/
/* Compute SATD/SAD for all unique nodes of children CUs to get best merge */
/* results */
/***************************************************************************/
for(i = 0; i < num_unique_nodes_cu_merge; i++)
{
WORD8 i1_ref_idx;
ps_search_node = &as_merge_unique_node[i];
/*********************************************************************/
/* Compute the base pointer for input, interpolated buffers */
/* The base pointers point as follows: */
/* fx fy : 0, 0 :: fx, hy : 0, 0.5, hx, fy: 0.5, 0, hx, fy: 0.5, 0.5 */
/* To these, we need to add the offset of the current node */
/*********************************************************************/
i1_ref_idx = ps_search_node->i1_ref_idx;
apu1_hpel_ref[0] = ps_curr_layer->ppu1_list_rec_fxfy[i1_ref_idx] + i4_offset;
apu1_hpel_ref[1] = ps_curr_layer->ppu1_list_rec_hxfy[i1_ref_idx] + i4_offset;
apu1_hpel_ref[2] = ps_curr_layer->ppu1_list_rec_fxhy[i1_ref_idx] + i4_offset;
apu1_hpel_ref[3] = ps_curr_layer->ppu1_list_rec_hxhy[i1_ref_idx] + i4_offset;
s_interp_prms.ppu1_ref = &apu1_hpel_ref[0];
pf_qpel_interp(
&s_interp_prms,
ps_search_node->s_mv.i2_mvx,
ps_search_node->s_mv.i2_mvy,
i4_interp_buf_id);
pred_lx = i4_search_idx;
ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
s_result_prms.u1_pred_lx = pred_lx;
s_result_prms.ps_search_node_base = ps_search_node;
s_err_prms.pu1_inp =
ps_wt_inp_prms->apu1_wt_inp[i1_ref_idx] + x_off + y_off * ps_subpel_prms->i4_inp_stride;
s_err_prms.pu1_ref = s_interp_prms.pu1_final_out;
s_err_prms.i4_ref_stride = s_interp_prms.i4_final_out_stride;
/* Carry out the SAD/SATD. This call also does the TU RECURSION.
Here the tu recursion logic is restricted with the size of the PU*/
pf_err_compute(&s_err_prms);
if(ps_subpel_prms->u1_is_cu_noisy &&
ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
{
ps_me_optimised_function_list->pf_compute_stim_injected_distortion_for_all_parts(
s_err_prms.pu1_ref,
s_err_prms.i4_ref_stride,
ai4_valid_part_ids,
ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX,
ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
s_err_prms.pi4_sad_grid,
ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier,
ps_wt_inp_prms->a_inv_wpred_wt[i1_ref_idx],
ps_wt_inp_prms->ai4_shift_val[i1_ref_idx],
i4_num_valid_parts,
ps_wt_inp_prms->wpred_log_wdc,
(BLK_32x32 == e_blk_size) ? 32 : 64);
}
/* Update the mv's */
s_result_prms.i2_mv_x = ps_search_node->s_mv.i2_mvx;
s_result_prms.i2_mv_y = ps_search_node->s_mv.i2_mvy;
/* Update best results */
hme_update_results_pt_pu_best1_subpel_hs(&s_err_prms, &s_result_prms);
}
/************************************************************************/
/* Update mv cost and total cost for each valid partition in the CU */
/************************************************************************/
for(i = 0; i < TOT_NUM_PARTS; i++)
{
if(i4_part_mask & (1 << i))
{
WORD32 j;
WORD32 i4_mv_cost;
ps_search_node = ps_search_results->aps_part_results[i4_search_idx][i];
for(j = 0;
j < MIN(ps_search_results->u1_num_results_per_part, num_unique_nodes_cu_merge);
j++)
{
if(ps_search_node->i1_ref_idx != -1)
{
pred_lx = i4_search_idx;
ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
/* Prediction context should now deal with qpel units */
HME_SET_MVPRED_RES(ps_pred_ctxt, MV_RES_QPEL);
ps_search_node->u1_subpel_done = 1;
ps_search_node->u1_is_avail = 1;
i4_mv_cost =
pf_mv_cost_compute(ps_search_node, ps_pred_ctxt, (PART_ID_T)i, MV_RES_QPEL);
ps_search_node->i4_tot_cost = i4_mv_cost + ps_search_node->i4_sad;
ps_search_node->i4_mv_cost = i4_mv_cost;
ps_search_node++;
}
}
}
}
return num_unique_nodes_cu_merge;
}
#define CU_MERGE_MAX_INTRA_PARTS 4
/**
********************************************************************************
* @fn hme_try_merge_high_speed
*
* @brief Attempts to merge 4 NxN candts to a 2Nx2N candt, either as a single
entity or with partititons for high speed preset
*
* @param[in,out] hme_merge_prms_t: Params for CU merge
*
* @return MERGE_RESULT_T type result of merge (CU_MERGED/CU_SPLIT)
********************************************************************************
*/
CU_MERGE_RESULT_T hme_try_merge_high_speed(
me_ctxt_t *ps_thrd_ctxt,
me_frm_ctxt_t *ps_ctxt,
ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
hme_subpel_prms_t *ps_subpel_prms,
hme_merge_prms_t *ps_merge_prms,
inter_pu_results_t *ps_pu_results,
pu_result_t *ps_pu_result)
{
search_results_t *ps_results_tl, *ps_results_tr;
search_results_t *ps_results_bl, *ps_results_br;
S32 i;
S32 i4_search_idx;
S32 i4_cost_parent;
S32 intra_cu_size;
ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
search_results_t *ps_results_merge = ps_merge_prms->ps_results_merge;
wgt_pred_ctxt_t *ps_wt_inp_prms = &ps_ctxt->s_wt_pred;
S32 i4_part_mask = ENABLE_ALL_PARTS - ENABLE_NxN;
S32 is_vert = 0, i4_best_part_type = -1;
S32 i4_intra_parts = 0; /* Keeps track of intra percentage before merge */
S32 i4_cost_children = 0;
S32 i4_frm_qstep = ps_ctxt->frm_qstep;
S32 i4_num_merge_cands_evaluated = 0;
U08 u1_x_off = ps_results_merge->u1_x_off;
U08 u1_y_off = ps_results_merge->u1_y_off;
S32 i4_32x32_id = (u1_y_off >> 4) + (u1_x_off >> 5);
ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
ps_results_tl = ps_merge_prms->ps_results_tl;
ps_results_tr = ps_merge_prms->ps_results_tr;
ps_results_bl = ps_merge_prms->ps_results_bl;
ps_results_br = ps_merge_prms->ps_results_br;
if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED)
{
i4_part_mask &= ~ENABLE_AMP;
}
if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25)
{
i4_part_mask &= ~ENABLE_AMP;
i4_part_mask &= ~ENABLE_SMP;
}
ps_merge_prms->i4_num_pred_dir_actual = 0;
/*************************************************************************/
/* The logic for High speed CU merge goes as follows: */
/* */
/* 1. Early exit with CU_SPLIT if sum of best partitions of children CUs */
/* exceed 7 */
/* 2. Early exit with CU_MERGE if mvs of best partitions of children CUs */
/* are identical */
/* 3. Find the all unique mvs of best partitions of children CUs and */
/* evaluate partial SATDs (all 17 partitions) for each unique mv. If */
/* best parent cost is lower than sum of the best children costs */
/* return CU_MERGE after seeding the best results else return CU_SPLIT*/
/* */
/*************************************************************************/
/* Count the number of best partitions in child CUs, early exit if > 7 */
if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
(CU_32x32 == ps_results_merge->e_cu_size))
{
S32 num_parts_in_32x32 = 0;
WORD32 i4_part_type;
if(ps_results_tl->u1_split_flag)
{
num_parts_in_32x32 += 4;
#define COST_INTERCHANGE 0
i4_cost_children = ps_merge_prms->ps_8x8_cu_results[0].ps_best_results->i4_tot_cost +
ps_merge_prms->ps_8x8_cu_results[1].ps_best_results->i4_tot_cost +
ps_merge_prms->ps_8x8_cu_results[2].ps_best_results->i4_tot_cost +
ps_merge_prms->ps_8x8_cu_results[3].ps_best_results->i4_tot_cost;
}
else
{
i4_part_type = ps_results_tl->ps_cu_results->ps_best_results[0].u1_part_type;
num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
i4_cost_children = ps_results_tl->ps_cu_results->ps_best_results[0].i4_tot_cost;
}
if(ps_results_tr->u1_split_flag)
{
num_parts_in_32x32 += 4;
i4_cost_children += ps_merge_prms->ps_8x8_cu_results[4].ps_best_results->i4_tot_cost +
ps_merge_prms->ps_8x8_cu_results[5].ps_best_results->i4_tot_cost +
ps_merge_prms->ps_8x8_cu_results[6].ps_best_results->i4_tot_cost +
ps_merge_prms->ps_8x8_cu_results[7].ps_best_results->i4_tot_cost;
}
else
{
i4_part_type = ps_results_tr->ps_cu_results->ps_best_results[0].u1_part_type;
num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
i4_cost_children += ps_results_tr->ps_cu_results->ps_best_results[0].i4_tot_cost;
}
if(ps_results_bl->u1_split_flag)
{
num_parts_in_32x32 += 4;
i4_cost_children += ps_merge_prms->ps_8x8_cu_results[8].ps_best_results->i4_tot_cost +
ps_merge_prms->ps_8x8_cu_results[9].ps_best_results->i4_tot_cost +
ps_merge_prms->ps_8x8_cu_results[10].ps_best_results->i4_tot_cost +
ps_merge_prms->ps_8x8_cu_results[11].ps_best_results->i4_tot_cost;
}
else
{
i4_part_type = ps_results_bl->ps_cu_results->ps_best_results[0].u1_part_type;
num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
i4_cost_children += ps_results_bl->ps_cu_results->ps_best_results[0].i4_tot_cost;
}
if(ps_results_br->u1_split_flag)
{
num_parts_in_32x32 += 4;
i4_cost_children += ps_merge_prms->ps_8x8_cu_results[12].ps_best_results->i4_tot_cost +
ps_merge_prms->ps_8x8_cu_results[13].ps_best_results->i4_tot_cost +
ps_merge_prms->ps_8x8_cu_results[14].ps_best_results->i4_tot_cost +
ps_merge_prms->ps_8x8_cu_results[15].ps_best_results->i4_tot_cost;
}
else
{
i4_part_type = ps_results_br->ps_cu_results->ps_best_results[0].u1_part_type;
num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
i4_cost_children += ps_results_br->ps_cu_results->ps_best_results[0].i4_tot_cost;
}
if((num_parts_in_32x32 > 7) && (ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY))
{
return CU_SPLIT;
}
if((num_parts_in_32x32 > MAX_NUM_CONSTITUENT_MVS_TO_ENABLE_32MERGE_IN_XS25) &&
(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25))
{
return CU_SPLIT;
}
}
/* Accumulate intra percentage before merge for early CU_SPLIT decision */
/* Note : Each intra part represent a NxN unit of the children CUs */
/* This is essentially 1/16th of the CUsize under consideration for merge */
if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
{
if(CU_64x64 == ps_results_merge->e_cu_size)
{
i4_intra_parts =
(!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_inter_eval_enable)
? 16
: ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_intra_eval_enable;
}
else
{
switch((ps_results_merge->u1_x_off >> 5) + ((ps_results_merge->u1_y_off >> 4)))
{
case 0:
{
i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tl
->u1_inter_eval_enable)
? 16
: (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
->ps_child_node_tl->u1_intra_eval_enable);
break;
}
case 1:
{
i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tr
->u1_inter_eval_enable)
? 16
: (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
->ps_child_node_tr->u1_intra_eval_enable);
break;
}
case 2:
{
i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_bl
->u1_inter_eval_enable)
? 16
: (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
->ps_child_node_bl->u1_intra_eval_enable);
break;
}
case 3:
{
i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_br
->u1_inter_eval_enable)
? 16
: (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
->ps_child_node_br->u1_intra_eval_enable);
break;
}
}
}
}
else
{
for(i = 0; i < 4; i++)
{
search_results_t *ps_results =
(i == 0) ? ps_results_tl
: ((i == 1) ? ps_results_tr : ((i == 2) ? ps_results_bl : ps_results_br));
part_type_results_t *ps_best_res = &ps_results->ps_cu_results->ps_best_results[0];
if(ps_results->u1_split_flag)
{
U08 u1_x_off = ps_results->u1_x_off;
U08 u1_y_off = ps_results->u1_y_off;
U08 u1_8x8_zscan_id = gau1_ctb_raster_to_zscan[(u1_x_off >> 2) + (u1_y_off << 2)] >>
2;
/* Special case to handle 8x8 CUs when 16x16 is split */
ASSERT(ps_results->e_cu_size == CU_16x16);
ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id].ps_best_results[0];
if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
i4_intra_parts += 1;
ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 1].ps_best_results[0];
if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
i4_intra_parts += 1;
ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 2].ps_best_results[0];
if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
i4_intra_parts += 1;
ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 3].ps_best_results[0];
if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
i4_intra_parts += 1;
}
else if(ps_best_res[0].as_pu_results[0].pu.b1_intra_flag)
{
i4_intra_parts += 4;
}
}
}
/* Determine the max intra CU size indicated by IPE */
intra_cu_size = CU_64x64;
if(ps_cur_ipe_ctb->u1_split_flag)
{
intra_cu_size = CU_32x32;
if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
{
intra_cu_size = CU_16x16;
}
}
if(((i4_intra_parts > CU_MERGE_MAX_INTRA_PARTS) &&
(intra_cu_size < ps_results_merge->e_cu_size) &&
(ME_PRISTINE_QUALITY != ps_merge_prms->e_quality_preset)) ||
(i4_intra_parts == 16))
{
S32 i4_merge_outcome;
i4_merge_outcome = (CU_32x32 == ps_results_merge->e_cu_size)
? (!ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag &&
ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_valid_cu)
: (!ps_cur_ipe_ctb->u1_split_flag);
i4_merge_outcome = i4_merge_outcome ||
(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset);
i4_merge_outcome = i4_merge_outcome &&
!(ps_subpel_prms->u1_is_cu_noisy && DISABLE_INTRA_WHEN_NOISY);
if(i4_merge_outcome)
{
inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
part_type_results_t *ps_best_result = ps_cu_results->ps_best_results;
pu_t *ps_pu = &ps_best_result->as_pu_results->pu;
ps_cu_results->u1_num_best_results = 1;
ps_cu_results->u1_cu_size = ps_results_merge->e_cu_size;
ps_cu_results->u1_x_off = u1_x_off;
ps_cu_results->u1_y_off = u1_y_off;
ps_best_result->u1_part_type = PRT_2Nx2N;
ps_best_result->ai4_tu_split_flag[0] = 0;
ps_best_result->ai4_tu_split_flag[1] = 0;
ps_best_result->ai4_tu_split_flag[2] = 0;
ps_best_result->ai4_tu_split_flag[3] = 0;
ps_best_result->i4_tot_cost =
(CU_64x64 == ps_results_merge->e_cu_size)
? ps_cur_ipe_ctb->i4_best64x64_intra_cost
: ps_cur_ipe_ctb->ai4_best32x32_intra_cost[i4_32x32_id];
ps_pu->b1_intra_flag = 1;
ps_pu->b4_pos_x = u1_x_off >> 2;
ps_pu->b4_pos_y = u1_y_off >> 2;
ps_pu->b4_wd = (1 << (ps_results_merge->e_cu_size + 1)) - 1;
ps_pu->b4_ht = ps_pu->b4_wd;
ps_pu->mv.i1_l0_ref_idx = -1;
ps_pu->mv.i1_l1_ref_idx = -1;
ps_pu->mv.s_l0_mv.i2_mvx = INTRA_MV;
ps_pu->mv.s_l0_mv.i2_mvy = INTRA_MV;
ps_pu->mv.s_l1_mv.i2_mvx = INTRA_MV;
ps_pu->mv.s_l1_mv.i2_mvy = INTRA_MV;
return CU_MERGED;
}
else
{
return CU_SPLIT;
}
}
if(i4_intra_parts)
{
i4_part_mask = ENABLE_2Nx2N;
}
ps_results_merge->u1_num_active_ref = (ps_ctxt->s_frm_prms.bidir_enabled) ? 2 : 1;
hme_reset_search_results(ps_results_merge, i4_part_mask, MV_RES_QPEL);
ps_results_merge->u1_num_active_ref = ps_merge_prms->i4_num_ref;
ps_merge_prms->i4_num_pred_dir_actual = 0;
if(ps_subpel_prms->u1_is_cu_noisy && ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
{
S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
S32 i4_num_valid_parts;
S32 i4_sigma_array_offset;
i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
/*********************************************************************************************************************************************/
/* i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values */
/* Logic is x/4 + ((y/4) x 16) : every 4 pixel increase in x equals one 4x4 block increment, every 4 pixel increase in y equals 16 4x4 block */
/* increment as there will be 256 4x4 blocks in a CTB */
/*********************************************************************************************************************************************/
i4_sigma_array_offset = (ps_merge_prms->ps_results_merge->u1_x_off / 4) +
(ps_merge_prms->ps_results_merge->u1_y_off * 4);
for(i = 0; i < i4_num_valid_parts; i++)
{
S32 i4_part_id = ai4_valid_part_ids[i];
hme_compute_final_sigma_of_pu_from_base_blocks(
ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
au8_final_src_sigmaX,
au8_final_src_sigmaXSquared,
(CU_32x32 == ps_results_merge->e_cu_size) ? 32 : 64,
4,
i4_part_id,
16);
}
ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX = au8_final_src_sigmaX;
ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared = au8_final_src_sigmaXSquared;
}
/*************************************************************************/
/* Loop through all ref idx and pick the merge candts and refine based */
/* on the active partitions. At this stage num ref will be 1 or 2 */
/*************************************************************************/
for(i4_search_idx = 0; i4_search_idx < ps_merge_prms->i4_num_ref; i4_search_idx++)
{
S32 i4_cands;
U08 u1_pred_dir = 0;
if((2 == ps_merge_prms->i4_num_ref) || (!ps_ctxt->s_frm_prms.bidir_enabled))
{
u1_pred_dir = i4_search_idx;
}
else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
{
u1_pred_dir = 1;
}
else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0)
{
u1_pred_dir = 0;
}
else
{
ASSERT(0);
}
/* call the function to pick and evaluate the merge candts, given */
/* a ref id and a part mask. */
i4_cands = hme_pick_eval_merge_candts(
ps_merge_prms,
ps_subpel_prms,
u1_pred_dir,
i4_best_part_type,
is_vert,
ps_wt_inp_prms,
i4_frm_qstep,
ps_cmn_utils_optimised_function_list,
ps_me_optimised_function_list);
if(i4_cands)
{
ps_merge_prms->au1_pred_dir_searched[ps_merge_prms->i4_num_pred_dir_actual] =
u1_pred_dir;
ps_merge_prms->i4_num_pred_dir_actual++;
}
i4_num_merge_cands_evaluated += i4_cands;
}
/* Call the decide_part_types function here */
/* Populate the new PU struct with the results post subpel refinement*/
if(i4_num_merge_cands_evaluated)
{
inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
ps_merge_prms->ps_inter_ctb_prms->i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
ps_merge_prms->ps_inter_ctb_prms->i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
hme_populate_pus(
ps_thrd_ctxt,
ps_ctxt,
ps_subpel_prms,
ps_results_merge,
ps_cu_results,
ps_pu_results,
ps_pu_result,
ps_merge_prms->ps_inter_ctb_prms,
&ps_ctxt->s_wt_pred,
ps_merge_prms->ps_layer_ctxt,
ps_merge_prms->au1_pred_dir_searched,
ps_merge_prms->i4_num_pred_dir_actual);
ps_cu_results->i4_inp_offset = (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
hme_decide_part_types(
ps_cu_results,
ps_pu_results,
ps_merge_prms->ps_inter_ctb_prms,
ps_ctxt,
ps_cmn_utils_optimised_function_list,
ps_me_optimised_function_list
);
/*****************************************************************/
/* INSERT INTRA RESULTS AT 32x32/64x64 LEVEL. */
/*****************************************************************/
#if DISABLE_INTRA_IN_BPICS
if(1 != ((ME_XTREME_SPEED_25 == ps_merge_prms->e_quality_preset) &&
(ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
#endif
{
if(!(DISABLE_INTRA_WHEN_NOISY && ps_merge_prms->ps_inter_ctb_prms->u1_is_cu_noisy))
{
hme_insert_intra_nodes_post_bipred(
ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
}
}
}
else
{
return CU_SPLIT;
}
/* We check the best result of ref idx 0 and compare for parent vs child */
if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
(CU_32x32 == ps_results_merge->e_cu_size))
{
i4_cost_parent = ps_results_merge->ps_cu_results->ps_best_results[0].i4_tot_cost;
/*********************************************************************/
/* Add the cost of signaling the CU tree bits. */
/* Assuming parent is not split, then we signal 1 bit for this parent*/
/* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
/* So, 4*lambda is extra for children cost. :Lokesh */
/*********************************************************************/
{
pred_ctxt_t *ps_pred_ctxt = &ps_results_merge->as_pred_ctxt[0];
i4_cost_children += ((4 * ps_pred_ctxt->lambda) >> (ps_pred_ctxt->lambda_q_shift));
}
if(i4_cost_parent < i4_cost_children)
{
return CU_MERGED;
}
return CU_SPLIT;
}
else
{
return CU_MERGED;
}
}
#define COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, shift) \
{ \
(ps_mv)->i2_mv_x = (ps_search_node)->s_mv.i2_mvx >> (shift); \
(ps_mv)->i2_mv_y = (ps_search_node)->s_mv.i2_mvy >> (shift); \
*(pi1_ref_idx) = (ps_search_node)->i1_ref_idx; \
}
/**
********************************************************************************
* @fn hme_update_mv_bank_noencode(search_results_t *ps_search_results,
* layer_mv_t *ps_layer_mv,
* S32 i4_search_blk_x,
* S32 i4_search_blk_y,
* mvbank_update_prms_t *ps_prms)
*
* @brief Updates the mv bank in case there is no further encodign to be done
*
* @param[in] ps_search_results: contains results for the block just searched
*
* @param[in,out] ps_layer_mv : Has pointer to mv bank amongst other things
*
* @param[in] i4_search_blk_x : col num of blk being searched
*
* @param[in] i4_search_blk_y : row num of blk being searched
*
* @param[in] ps_prms : contains certain parameters which govern how updatedone
*
* @return None
********************************************************************************
*/
void hme_update_mv_bank_noencode(
search_results_t *ps_search_results,
layer_mv_t *ps_layer_mv,
S32 i4_search_blk_x,
S32 i4_search_blk_y,
mvbank_update_prms_t *ps_prms)
{
hme_mv_t *ps_mv;
hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
S32 i4_blk_x, i4_blk_y, i4_offset;
S32 i4_j, i4_ref_id;
search_node_t *ps_search_node;
search_node_t *ps_search_node_8x8, *ps_search_node_4x4_1;
search_node_t *ps_search_node_4x4_2, *ps_search_node_4x4_3;
search_node_t *ps_search_node_4x4_4;
i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
/* Identify the correct offset in the mvbank and the reference id buf */
ps_mv = ps_layer_mv->ps_mv + i4_offset;
pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
/*************************************************************************/
/* Supposing we store the mvs in the same blk size as we searched (e.g. */
/* we searched 8x8 blks and store results for 8x8 blks), then we can */
/* do a straightforward single update of results. This will have a 1-1 */
/* correspondence. */
/*************************************************************************/
if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
{
for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
{
ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
{
COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, 0);
ps_mv++;
pi1_ref_idx++;
ps_search_node++;
}
}
return;
}
/*************************************************************************/
/* Case where search blk size is 8x8, but we update 4x4 results. In this */
/* case, we need to have NxN partitions enabled in search. */
/* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
/* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
/*************************************************************************/
ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
/*************************************************************************/
/* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
/* hence the below check. */
/*************************************************************************/
ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
ps_mv1 = ps_mv;
ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
pi1_ref_idx1 = pi1_ref_idx;
pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
for(i4_ref_id = 0; i4_ref_id < (S32)ps_search_results->u1_num_active_ref; i4_ref_id++)
{
ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
ps_search_node_4x4_1 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL];
ps_search_node_4x4_2 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TR];
ps_search_node_4x4_3 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BL];
ps_search_node_4x4_4 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BR];
COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
ps_mv1++;
pi1_ref_idx1++;
ps_search_node_4x4_1++;
COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
ps_mv2++;
pi1_ref_idx2++;
ps_search_node_4x4_2++;
COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
ps_mv3++;
pi1_ref_idx3++;
ps_search_node_4x4_3++;
COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
ps_mv4++;
pi1_ref_idx4++;
ps_search_node_4x4_4++;
if(ps_layer_mv->i4_num_mvs_per_ref > 1)
{
COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_8x8, 0);
ps_mv1++;
pi1_ref_idx1++;
COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_8x8, 0);
ps_mv2++;
pi1_ref_idx2++;
COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_8x8, 0);
ps_mv3++;
pi1_ref_idx3++;
COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_8x8, 0);
ps_mv4++;
pi1_ref_idx4++;
}
for(i4_j = 2; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
{
COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
ps_mv1++;
pi1_ref_idx1++;
ps_search_node_4x4_1++;
COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
ps_mv2++;
pi1_ref_idx2++;
ps_search_node_4x4_2++;
COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
ps_mv3++;
pi1_ref_idx3++;
ps_search_node_4x4_3++;
COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
ps_mv4++;
pi1_ref_idx4++;
ps_search_node_4x4_4++;
}
}
}
void hme_update_mv_bank_encode(
search_results_t *ps_search_results,
layer_mv_t *ps_layer_mv,
S32 i4_search_blk_x,
S32 i4_search_blk_y,
mvbank_update_prms_t *ps_prms,
U08 *pu1_pred_dir_searched,
S32 i4_num_act_ref_l0)
{
hme_mv_t *ps_mv;
hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
S32 i4_blk_x, i4_blk_y, i4_offset;
S32 j, i, num_parts;
search_node_t *ps_search_node_tl, *ps_search_node_tr;
search_node_t *ps_search_node_bl, *ps_search_node_br;
search_node_t s_zero_mv;
WORD32 i4_part_type = ps_search_results->ps_cu_results->ps_best_results[0].u1_part_type;
i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
/* Identify the correct offset in the mvbank and the reference id buf */
ps_mv = ps_layer_mv->ps_mv + i4_offset;
pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
ASSERT(ps_layer_mv->e_blk_size == BLK_8x8);
ASSERT(ps_prms->e_search_blk_size == BLK_16x16);
/*************************************************************************/
/* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
/* hence the below check. */
/*************************************************************************/
ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_best_results);
ps_mv1 = ps_mv;
ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
pi1_ref_idx1 = pi1_ref_idx;
pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
/* Initialize zero mv: default mv used for intra mvs */
s_zero_mv.s_mv.i2_mvx = 0;
s_zero_mv.s_mv.i2_mvy = 0;
s_zero_mv.i1_ref_idx = 0;
if((ps_search_results->e_cu_size == CU_16x16) && (ps_search_results->u1_split_flag) &&
(ps_search_results->i4_part_mask & ENABLE_NxN))
{
i4_part_type = PRT_NxN;
}
for(i = 0; i < ps_prms->i4_num_ref; i++)
{
for(j = 0; j < ps_layer_mv->i4_num_mvs_per_ref; j++)
{
WORD32 i4_part_id = ge_part_type_to_part_id[i4_part_type][0];
num_parts = gau1_num_parts_in_part_type[i4_part_type];
ps_search_node_tl =
ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id];
if(num_parts == 1)
{
ps_search_node_tr = ps_search_node_tl;
ps_search_node_bl = ps_search_node_tl;
ps_search_node_br = ps_search_node_tl;
}
else if(num_parts == 2)
{
/* For vertically oriented partitions, tl, bl pt to same result */
/* For horizontally oriented partition, tl, tr pt to same result */
/* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
/* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
/* and right 2 8x8 have 12x16R partition */
if(gau1_is_vert_part[i4_part_type])
{
ps_search_node_tr =
ps_search_results
->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
ps_search_node_bl = ps_search_node_tl;
}
else
{
ps_search_node_tr = ps_search_node_tl;
ps_search_node_bl =
ps_search_results
->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
}
ps_search_node_br =
ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
}
else
{
/* 4 unique results */
ps_search_node_tr =
ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
ps_search_node_bl =
ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2];
ps_search_node_br =
ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3];
}
if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
ps_search_node_tl++;
if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
ps_search_node_tr++;
if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
ps_search_node_bl++;
if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
ps_search_node_br++;
COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
ps_mv1++;
pi1_ref_idx1++;
COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
ps_mv2++;
pi1_ref_idx2++;
COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
ps_mv3++;
pi1_ref_idx3++;
COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
ps_mv4++;
pi1_ref_idx4++;
if(ps_prms->i4_num_results_to_store > 1)
{
ps_search_node_tl =
&ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id][1];
if(num_parts == 1)
{
ps_search_node_tr = ps_search_node_tl;
ps_search_node_bl = ps_search_node_tl;
ps_search_node_br = ps_search_node_tl;
}
else if(num_parts == 2)
{
/* For vertically oriented partitions, tl, bl pt to same result */
/* For horizontally oriented partition, tl, tr pt to same result */
/* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
/* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
/* and right 2 8x8 have 12x16R partition */
if(gau1_is_vert_part[i4_part_type])
{
ps_search_node_tr =
&ps_search_results
->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
ps_search_node_bl = ps_search_node_tl;
}
else
{
ps_search_node_tr = ps_search_node_tl;
ps_search_node_bl =
&ps_search_results
->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
}
ps_search_node_br =
&ps_search_results
->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
}
else
{
/* 4 unique results */
ps_search_node_tr =
&ps_search_results
->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
ps_search_node_bl =
&ps_search_results
->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2][1];
ps_search_node_br =
&ps_search_results
->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3][1];
}
if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
ps_search_node_tl++;
if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
ps_search_node_tr++;
if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
ps_search_node_bl++;
if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
ps_search_node_br++;
COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
ps_mv1++;
pi1_ref_idx1++;
COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
ps_mv2++;
pi1_ref_idx2++;
COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
ps_mv3++;
pi1_ref_idx3++;
COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
ps_mv4++;
pi1_ref_idx4++;
}
}
}
}
/**
********************************************************************************
* @fn hme_update_mv_bank_noencode(search_results_t *ps_search_results,
* layer_mv_t *ps_layer_mv,
* S32 i4_search_blk_x,
* S32 i4_search_blk_y,
* mvbank_update_prms_t *ps_prms)
*
* @brief Updates the mv bank in case there is no further encodign to be done
*
* @param[in] ps_search_results: contains results for the block just searched
*
* @param[in,out] ps_layer_mv : Has pointer to mv bank amongst other things
*
* @param[in] i4_search_blk_x : col num of blk being searched
*
* @param[in] i4_search_blk_y : row num of blk being searched
*
* @param[in] ps_prms : contains certain parameters which govern how updatedone
*
* @return None
********************************************************************************
*/
void hme_update_mv_bank_in_l1_me(
search_results_t *ps_search_results,
layer_mv_t *ps_layer_mv,
S32 i4_search_blk_x,
S32 i4_search_blk_y,
mvbank_update_prms_t *ps_prms)
{
hme_mv_t *ps_mv;
hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
S32 i4_blk_x, i4_blk_y, i4_offset;
S32 i4_j, i4_ref_id;
search_node_t *ps_search_node;
search_node_t *ps_search_node_8x8, *ps_search_node_4x4;
i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
/* Identify the correct offset in the mvbank and the reference id buf */
ps_mv = ps_layer_mv->ps_mv + i4_offset;
pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
/*************************************************************************/
/* Supposing we store the mvs in the same blk size as we searched (e.g. */
/* we searched 8x8 blks and store results for 8x8 blks), then we can */
/* do a straightforward single update of results. This will have a 1-1 */
/* correspondence. */
/*************************************************************************/
if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
{
search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 2];
hme_mv_t *ps_mv_l0_root = ps_mv;
hme_mv_t *ps_mv_l1_root =
ps_mv + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
U32 u4_num_l0_results_updated = 0;
U32 u4_num_l1_results_updated = 0;
S08 *pi1_ref_idx_l0_root = pi1_ref_idx;
S08 *pi1_ref_idx_l1_root =
pi1_ref_idx_l0_root + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
{
U32 *pu4_num_results_updated;
search_node_t **pps_result_nodes;
U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
if(u1_pred_dir_of_cur_ref)
{
pu4_num_results_updated = &u4_num_l1_results_updated;
pps_result_nodes = &aps_result_nodes_sorted[1][0];
}
else
{
pu4_num_results_updated = &u4_num_l0_results_updated;
pps_result_nodes = &aps_result_nodes_sorted[0][0];
}
ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
{
hme_add_new_node_to_a_sorted_array(
&ps_search_node[i4_j], pps_result_nodes, NULL, *pu4_num_results_updated, 0);
ASSERT(ps_search_node[i4_j].i1_ref_idx == i4_ref_id);
(*pu4_num_results_updated)++;
}
}
for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
{
COPY_SEARCH_RESULT(
&ps_mv_l0_root[i4_j],
&pi1_ref_idx_l0_root[i4_j],
aps_result_nodes_sorted[0][i4_j],
0);
}
for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
{
COPY_SEARCH_RESULT(
&ps_mv_l1_root[i4_j],
&pi1_ref_idx_l1_root[i4_j],
aps_result_nodes_sorted[1][i4_j],
0);
}
return;
}
/*************************************************************************/
/* Case where search blk size is 8x8, but we update 4x4 results. In this */
/* case, we need to have NxN partitions enabled in search. */
/* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
/* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
/*************************************************************************/
ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
/*************************************************************************/
/* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
/* hence the below check. */
/*************************************************************************/
ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
ps_mv1 = ps_mv;
ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
pi1_ref_idx1 = pi1_ref_idx;
pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
{
search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 4];
U08 au1_cost_shifts_for_sorted_node[2][MAX_NUM_REF * 4];
S32 i;
hme_mv_t *ps_mv1_l0_root = ps_mv1;
hme_mv_t *ps_mv1_l1_root =
ps_mv1 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
hme_mv_t *ps_mv2_l0_root = ps_mv2;
hme_mv_t *ps_mv2_l1_root =
ps_mv2 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
hme_mv_t *ps_mv3_l0_root = ps_mv3;
hme_mv_t *ps_mv3_l1_root =
ps_mv3 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
hme_mv_t *ps_mv4_l0_root = ps_mv4;
hme_mv_t *ps_mv4_l1_root =
ps_mv4 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
U32 u4_num_l0_results_updated = 0;
U32 u4_num_l1_results_updated = 0;
S08 *pi1_ref_idx1_l0_root = pi1_ref_idx1;
S08 *pi1_ref_idx1_l1_root = pi1_ref_idx1_l0_root + (ps_prms->i4_num_active_ref_l0 *
ps_layer_mv->i4_num_mvs_per_ref);
S08 *pi1_ref_idx2_l0_root = pi1_ref_idx2;
S08 *pi1_ref_idx2_l1_root = pi1_ref_idx2_l0_root + (ps_prms->i4_num_active_ref_l0 *
ps_layer_mv->i4_num_mvs_per_ref);
S08 *pi1_ref_idx3_l0_root = pi1_ref_idx3;
S08 *pi1_ref_idx3_l1_root = pi1_ref_idx3_l0_root + (ps_prms->i4_num_active_ref_l0 *
ps_layer_mv->i4_num_mvs_per_ref);
S08 *pi1_ref_idx4_l0_root = pi1_ref_idx4;
S08 *pi1_ref_idx4_l1_root = pi1_ref_idx4_l0_root + (ps_prms->i4_num_active_ref_l0 *
ps_layer_mv->i4_num_mvs_per_ref);
for(i = 0; i < 4; i++)
{
hme_mv_t *ps_mv_l0_root;
hme_mv_t *ps_mv_l1_root;
S08 *pi1_ref_idx_l0_root;
S08 *pi1_ref_idx_l1_root;
for(i4_ref_id = 0; i4_ref_id < ps_search_results->u1_num_active_ref; i4_ref_id++)
{
U32 *pu4_num_results_updated;
search_node_t **pps_result_nodes;
U08 *pu1_cost_shifts_for_sorted_node;
U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
if(u1_pred_dir_of_cur_ref)
{
pu4_num_results_updated = &u4_num_l1_results_updated;
pps_result_nodes = &aps_result_nodes_sorted[1][0];
pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
}
else
{
pu4_num_results_updated = &u4_num_l0_results_updated;
pps_result_nodes = &aps_result_nodes_sorted[0][0];
pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
}
ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
ps_search_node_4x4 =
ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL + i];
for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
{
hme_add_new_node_to_a_sorted_array(
&ps_search_node_4x4[i4_j],
pps_result_nodes,
pu1_cost_shifts_for_sorted_node,
*pu4_num_results_updated,
0);
(*pu4_num_results_updated)++;
hme_add_new_node_to_a_sorted_array(
&ps_search_node_8x8[i4_j],
pps_result_nodes,
pu1_cost_shifts_for_sorted_node,
*pu4_num_results_updated,
2);
(*pu4_num_results_updated)++;
}
}
switch(i)
{
case 0:
{
ps_mv_l0_root = ps_mv1_l0_root;
ps_mv_l1_root = ps_mv1_l1_root;
pi1_ref_idx_l0_root = pi1_ref_idx1_l0_root;
pi1_ref_idx_l1_root = pi1_ref_idx1_l1_root;
break;
}
case 1:
{
ps_mv_l0_root = ps_mv2_l0_root;
ps_mv_l1_root = ps_mv2_l1_root;
pi1_ref_idx_l0_root = pi1_ref_idx2_l0_root;
pi1_ref_idx_l1_root = pi1_ref_idx2_l1_root;
break;
}
case 2:
{
ps_mv_l0_root = ps_mv3_l0_root;
ps_mv_l1_root = ps_mv3_l1_root;
pi1_ref_idx_l0_root = pi1_ref_idx3_l0_root;
pi1_ref_idx_l1_root = pi1_ref_idx3_l1_root;
break;
}
case 3:
{
ps_mv_l0_root = ps_mv4_l0_root;
ps_mv_l1_root = ps_mv4_l1_root;
pi1_ref_idx_l0_root = pi1_ref_idx4_l0_root;
pi1_ref_idx_l1_root = pi1_ref_idx4_l1_root;
break;
}
}
u4_num_l0_results_updated =
MIN((S32)u4_num_l0_results_updated,
ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
u4_num_l1_results_updated =
MIN((S32)u4_num_l1_results_updated,
ps_prms->i4_num_active_ref_l1 * ps_layer_mv->i4_num_mvs_per_ref);
for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
{
COPY_SEARCH_RESULT(
&ps_mv_l0_root[i4_j],
&pi1_ref_idx_l0_root[i4_j],
aps_result_nodes_sorted[0][i4_j],
0);
}
for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
{
COPY_SEARCH_RESULT(
&ps_mv_l1_root[i4_j],
&pi1_ref_idx_l1_root[i4_j],
aps_result_nodes_sorted[1][i4_j],
0);
}
}
}
}
/**
******************************************************************************
* @brief Scales motion vector component projecte from a diff layer in same
* picture (so no ref id related delta poc scaling required)
******************************************************************************
*/
#define SCALE_MV_COMP_RES(mvcomp_p, dim_c, dim_p) \
((((mvcomp_p) * (dim_c)) + ((SIGN((mvcomp_p)) * (dim_p)) >> 1)) / (dim_p))
/**
********************************************************************************
* @fn hme_project_coloc_candt(search_node_t *ps_search_node,
* layer_ctxt_t *ps_curr_layer,
* layer_ctxt_t *ps_coarse_layer,
* S32 i4_pos_x,
* S32 i4_pos_y,
* S08 i1_ref_id,
* S08 i1_result_id)
*
* @brief From a coarser layer, projects a candidated situated at "colocated"
* position in the picture (e.g. given x, y it will be x/2, y/2 dyadic
*
* @param[out] ps_search_node : contains the projected result
*
* @param[in] ps_curr_layer : current layer context
*
* @param[in] ps_coarse_layer : coarser layer context
*
* @param[in] i4_pos_x : x Position where mv is required (w.r.t. curr layer)
*
* @param[in] i4_pos_y : y Position where mv is required (w.r.t. curr layer)
*
* @param[in] i1_ref_id : reference id for which the candidate required
*
* @param[in] i4_result_id : result id for which the candidate required
* (0 : best result, 1 : next best)
*
* @return None
********************************************************************************
*/
void hme_project_coloc_candt(
search_node_t *ps_search_node,
layer_ctxt_t *ps_curr_layer,
layer_ctxt_t *ps_coarse_layer,
S32 i4_pos_x,
S32 i4_pos_y,
S08 i1_ref_id,
S32 i4_result_id)
{
S32 wd_c, ht_c, wd_p, ht_p;
S32 blksize_p, blk_x, blk_y, i4_offset;
layer_mv_t *ps_layer_mvbank;
hme_mv_t *ps_mv;
S08 *pi1_ref_idx;
/* Width and ht of current and prev layers */
wd_c = ps_curr_layer->i4_wd;
ht_c = ps_curr_layer->i4_ht;
wd_p = ps_coarse_layer->i4_wd;
ht_p = ps_coarse_layer->i4_ht;
ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
blksize_p = (S32)gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
/* Safety check to avoid uninitialized access across temporal layers */
i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
/* Project the positions to prev layer */
/* TODO: convert these to scale factors at pic level */
blk_x = (i4_pos_x * wd_p) / (wd_c * blksize_p);
blk_y = (i4_pos_y * ht_p) / (ht_c * blksize_p);
/* Pick up the mvs from the location */
i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
ps_search_node->s_mv.i2_mvx = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_x, wd_c, wd_p);
ps_search_node->s_mv.i2_mvy = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_y, ht_c, ht_p);
ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
ps_search_node->u1_subpel_done = 0;
if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
{
ps_search_node->i1_ref_idx = i1_ref_id;
ps_search_node->s_mv.i2_mvx = 0;
ps_search_node->s_mv.i2_mvy = 0;
}
}
/**
********************************************************************************
* @fn hme_project_coloc_candt_dyadic(search_node_t *ps_search_node,
* layer_ctxt_t *ps_curr_layer,
* layer_ctxt_t *ps_coarse_layer,
* S32 i4_pos_x,
* S32 i4_pos_y,
* S08 i1_ref_id,
* S08 i1_result_id)
*
* @brief From a coarser layer, projects a candidated situated at "colocated"
* position in the picture when the ratios are dyadic
*
* @param[out] ps_search_node : contains the projected result
*
* @param[in] ps_curr_layer : current layer context
*
* @param[in] ps_coarse_layer : coarser layer context
*
* @param[in] i4_pos_x : x Position where mv is required (w.r.t. curr layer)
*
* @param[in] i4_pos_y : y Position where mv is required (w.r.t. curr layer)
*
* @param[in] i1_ref_id : reference id for which the candidate required
*
* @param[in] i4_result_id : result id for which the candidate required
* (0 : best result, 1 : next best)
*
* @return None
********************************************************************************
*/
void hme_project_coloc_candt_dyadic(
search_node_t *ps_search_node,
layer_ctxt_t *ps_curr_layer,
layer_ctxt_t *ps_coarse_layer,
S32 i4_pos_x,
S32 i4_pos_y,
S08 i1_ref_id,
S32 i4_result_id)
{
S32 wd_c, ht_c, wd_p, ht_p;
S32 blksize_p, blk_x, blk_y, i4_offset;
layer_mv_t *ps_layer_mvbank;
hme_mv_t *ps_mv;
S08 *pi1_ref_idx;
/* Width and ht of current and prev layers */
wd_c = ps_curr_layer->i4_wd;
ht_c = ps_curr_layer->i4_ht;
wd_p = ps_coarse_layer->i4_wd;
ht_p = ps_coarse_layer->i4_ht;
ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
/* blksize_p = log2(wd) + 1 */
blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
/* ASSERT for valid sizes */
ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
/* Safety check to avoid uninitialized access across temporal layers */
i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
/* Project the positions to prev layer */
/* TODO: convert these to scale factors at pic level */
blk_x = i4_pos_x >> blksize_p; // (2 * blksize_p);
blk_y = i4_pos_y >> blksize_p; // (2 * blksize_p);
/* Pick up the mvs from the location */
i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
{
ps_search_node->i1_ref_idx = i1_ref_id;
ps_search_node->s_mv.i2_mvx = 0;
ps_search_node->s_mv.i2_mvy = 0;
}
}
void hme_project_coloc_candt_dyadic_implicit(
search_node_t *ps_search_node,
layer_ctxt_t *ps_curr_layer,
layer_ctxt_t *ps_coarse_layer,
S32 i4_pos_x,
S32 i4_pos_y,
S32 i4_num_act_ref_l0,
U08 u1_pred_dir,
U08 u1_default_ref_id,
S32 i4_result_id)
{
S32 wd_c, ht_c, wd_p, ht_p;
S32 blksize_p, blk_x, blk_y, i4_offset;
layer_mv_t *ps_layer_mvbank;
hme_mv_t *ps_mv;
S08 *pi1_ref_idx;
/* Width and ht of current and prev layers */
wd_c = ps_curr_layer->i4_wd;
ht_c = ps_curr_layer->i4_ht;
wd_p = ps_coarse_layer->i4_wd;
ht_p = ps_coarse_layer->i4_ht;
ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
/* ASSERT for valid sizes */
ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
/* Safety check to avoid uninitialized access across temporal layers */
i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
/* Project the positions to prev layer */
/* TODO: convert these to scale factors at pic level */
blk_x = i4_pos_x >> blksize_p; // (2 * blksize_p);
blk_y = i4_pos_y >> blksize_p; // (2 * blksize_p);
/* Pick up the mvs from the location */
i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
if(u1_pred_dir == 1)
{
ps_mv += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
pi1_ref_idx += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
}
ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
{
ps_search_node->i1_ref_idx = u1_default_ref_id;
ps_search_node->s_mv.i2_mvx = 0;
ps_search_node->s_mv.i2_mvy = 0;
}
}
#define SCALE_RANGE_PRMS(prm1, prm2, shift) \
{ \
prm1.i2_min_x = prm2.i2_min_x << shift; \
prm1.i2_max_x = prm2.i2_max_x << shift; \
prm1.i2_min_y = prm2.i2_min_y << shift; \
prm1.i2_max_y = prm2.i2_max_y << shift; \
}
#define SCALE_RANGE_PRMS_POINTERS(prm1, prm2, shift) \
{ \
prm1->i2_min_x = prm2->i2_min_x << shift; \
prm1->i2_max_x = prm2->i2_max_x << shift; \
prm1->i2_min_y = prm2->i2_min_y << shift; \
prm1->i2_max_y = prm2->i2_max_y << shift; \
}
/**
********************************************************************************
* @fn void hme_refine_frm_init(me_ctxt_t *ps_ctxt,
* refine_layer_prms_t *ps_refine_prms)
*
* @brief Frame init of refinemnet layers in ME
*
* @param[in,out] ps_ctxt: ME Handle
*
* @param[in] ps_refine_prms : refinement layer prms
*
* @return None
********************************************************************************
*/
void hme_refine_frm_init(
layer_ctxt_t *ps_curr_layer, refine_prms_t *ps_refine_prms, layer_ctxt_t *ps_coarse_layer)
{
/* local variables */
BLK_SIZE_T e_result_blk_size = BLK_8x8;
S32 i4_num_ref_fpel, i4_num_ref_prev_layer;
i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
if(ps_refine_prms->explicit_ref)
{
i4_num_ref_fpel = i4_num_ref_prev_layer;
}
else
{
i4_num_ref_fpel = 2;
}
if(ps_refine_prms->i4_enable_4x4_part)
{
e_result_blk_size = BLK_4x4;
}
i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
hme_init_mv_bank(
ps_curr_layer,
e_result_blk_size,
i4_num_ref_fpel,
ps_refine_prms->i4_num_mvbank_results,
ps_refine_prms->i4_layer_id > 0 ? 0 : 1);
}
#if 1 //ENABLE_CU_RECURSION || TEST_AND_EVALUATE_CU_RECURSION
/**
********************************************************************************
* @fn void hme_init_clusters_16x16
* (
* cluster_16x16_blk_t *ps_cluster_blk_16x16
* )
*
* @brief Intialisations for the structs used in clustering algorithm
*
* @param[in/out] ps_cluster_blk_16x16: pointer to structure containing clusters
* of 16x16 block
*
* @return None
********************************************************************************
*/
static __inline void
hme_init_clusters_16x16(cluster_16x16_blk_t *ps_cluster_blk_16x16, S32 bidir_enabled)
{
S32 i;
ps_cluster_blk_16x16->num_clusters = 0;
ps_cluster_blk_16x16->intra_mv_area = 0;
ps_cluster_blk_16x16->best_inter_cost = 0;
for(i = 0; i < MAX_NUM_CLUSTERS_16x16; i++)
{
ps_cluster_blk_16x16->as_cluster_data[i].max_dist_from_centroid =
bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_16x16_B : MAX_DISTANCE_FROM_CENTROID_16x16;
ps_cluster_blk_16x16->as_cluster_data[i].is_valid_cluster = 0;
ps_cluster_blk_16x16->as_cluster_data[i].bi_mv_pixel_area = 0;
ps_cluster_blk_16x16->as_cluster_data[i].uni_mv_pixel_area = 0;
}
for(i = 0; i < MAX_NUM_REF; i++)
{
ps_cluster_blk_16x16->au1_num_clusters[i] = 0;
}
}
/**
********************************************************************************
* @fn void hme_init_clusters_32x32
* (
* cluster_32x32_blk_t *ps_cluster_blk_32x32
* )
*
* @brief Intialisations for the structs used in clustering algorithm
*
* @param[in/out] ps_cluster_blk_32x32: pointer to structure containing clusters
* of 32x32 block
*
* @return None
********************************************************************************
*/
static __inline void
hme_init_clusters_32x32(cluster_32x32_blk_t *ps_cluster_blk_32x32, S32 bidir_enabled)
{
S32 i;
ps_cluster_blk_32x32->num_clusters = 0;
ps_cluster_blk_32x32->intra_mv_area = 0;
ps_cluster_blk_32x32->best_alt_ref = -1;
ps_cluster_blk_32x32->best_uni_ref = -1;
ps_cluster_blk_32x32->best_inter_cost = 0;
ps_cluster_blk_32x32->num_clusters_with_weak_sdi_density = 0;
for(i = 0; i < MAX_NUM_CLUSTERS_32x32; i++)
{
ps_cluster_blk_32x32->as_cluster_data[i].max_dist_from_centroid =
bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_32x32_B : MAX_DISTANCE_FROM_CENTROID_32x32;
ps_cluster_blk_32x32->as_cluster_data[i].is_valid_cluster = 0;
ps_cluster_blk_32x32->as_cluster_data[i].bi_mv_pixel_area = 0;
ps_cluster_blk_32x32->as_cluster_data[i].uni_mv_pixel_area = 0;
}
for(i = 0; i < MAX_NUM_REF; i++)
{
ps_cluster_blk_32x32->au1_num_clusters[i] = 0;
}
}
/**
********************************************************************************
* @fn void hme_init_clusters_64x64
* (
* cluster_64x64_blk_t *ps_cluster_blk_64x64
* )
*
* @brief Intialisations for the structs used in clustering algorithm
*
* @param[in/out] ps_cluster_blk_64x64: pointer to structure containing clusters
* of 64x64 block
*
* @return None
********************************************************************************
*/
static __inline void
hme_init_clusters_64x64(cluster_64x64_blk_t *ps_cluster_blk_64x64, S32 bidir_enabled)
{
S32 i;
ps_cluster_blk_64x64->num_clusters = 0;
ps_cluster_blk_64x64->intra_mv_area = 0;
ps_cluster_blk_64x64->best_alt_ref = -1;
ps_cluster_blk_64x64->best_uni_ref = -1;
ps_cluster_blk_64x64->best_inter_cost = 0;
for(i = 0; i < MAX_NUM_CLUSTERS_64x64; i++)
{
ps_cluster_blk_64x64->as_cluster_data[i].max_dist_from_centroid =
bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_64x64_B : MAX_DISTANCE_FROM_CENTROID_64x64;
ps_cluster_blk_64x64->as_cluster_data[i].is_valid_cluster = 0;
ps_cluster_blk_64x64->as_cluster_data[i].bi_mv_pixel_area = 0;
ps_cluster_blk_64x64->as_cluster_data[i].uni_mv_pixel_area = 0;
}
for(i = 0; i < MAX_NUM_REF; i++)
{
ps_cluster_blk_64x64->au1_num_clusters[i] = 0;
}
}
/**
********************************************************************************
* @fn void hme_sort_and_assign_top_ref_ids_areawise
* (
* ctb_cluster_info_t *ps_ctb_cluster_info
* )
*
* @brief Finds best_uni_ref and best_alt_ref
*
* @param[in/out] ps_ctb_cluster_info: structure that points to ctb data
*
* @param[in] bidir_enabled: flag that indicates whether or not bi-pred is
* enabled
*
* @param[in] block_width: width of the block in pels
*
* @param[in] e_cu_pos: position of the block within the CTB
*
* @return None
********************************************************************************
*/
void hme_sort_and_assign_top_ref_ids_areawise(
ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width, CU_POS_T e_cu_pos)
{
cluster_32x32_blk_t *ps_32x32 = NULL;
cluster_64x64_blk_t *ps_64x64 = NULL;
cluster_data_t *ps_data;
S32 j, k;
S32 ai4_uni_area[MAX_NUM_REF];
S32 ai4_bi_area[MAX_NUM_REF];
S32 ai4_ref_id_found[MAX_NUM_REF];
S32 ai4_ref_id[MAX_NUM_REF];
S32 best_uni_ref = -1, best_alt_ref = -1;
S32 num_clusters;
S32 num_ref = 0;
S32 num_clusters_evaluated = 0;
S32 is_cur_blk_valid;
if(32 == block_width)
{
is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << e_cu_pos)) || 0;
ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cu_pos];
num_clusters = ps_32x32->num_clusters;
ps_data = &ps_32x32->as_cluster_data[0];
}
else
{
is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask == 0xf);
ps_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
num_clusters = ps_64x64->num_clusters;
ps_data = &ps_64x64->as_cluster_data[0];
}
#if !ENABLE_4CTB_EVALUATION
if((num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
{
return;
}
#endif
if(num_clusters == 0)
{
return;
}
else if(!is_cur_blk_valid)
{
return;
}
memset(ai4_uni_area, 0, sizeof(S32) * MAX_NUM_REF);
memset(ai4_bi_area, 0, sizeof(S32) * MAX_NUM_REF);
memset(ai4_ref_id_found, 0, sizeof(S32) * MAX_NUM_REF);
memset(ai4_ref_id, -1, sizeof(S32) * MAX_NUM_REF);
for(j = 0; num_clusters_evaluated < num_clusters; j++, ps_data++)
{
S32 ref_id;
if(!ps_data->is_valid_cluster)
{
continue;
}
ref_id = ps_data->ref_id;
num_clusters_evaluated++;
ai4_uni_area[ref_id] += ps_data->uni_mv_pixel_area;
ai4_bi_area[ref_id] += ps_data->bi_mv_pixel_area;
if(!ai4_ref_id_found[ref_id])
{
ai4_ref_id[ref_id] = ref_id;
ai4_ref_id_found[ref_id] = 1;
num_ref++;
}
}
{
S32 ai4_ref_id_temp[MAX_NUM_REF];
memcpy(ai4_ref_id_temp, ai4_ref_id, sizeof(S32) * MAX_NUM_REF);
for(k = 1; k < MAX_NUM_REF; k++)
{
if(ai4_uni_area[k] > ai4_uni_area[0])
{
SWAP_HME(ai4_uni_area[k], ai4_uni_area[0], S32);
SWAP_HME(ai4_ref_id_temp[k], ai4_ref_id_temp[0], S32);
}
}
best_uni_ref = ai4_ref_id_temp[0];
}
if(bidir_enabled)
{
for(k = 1; k < MAX_NUM_REF; k++)
{
if(ai4_bi_area[k] > ai4_bi_area[0])
{
SWAP_HME(ai4_bi_area[k], ai4_bi_area[0], S32);
SWAP_HME(ai4_ref_id[k], ai4_ref_id[0], S32);
}
}
if(!ai4_bi_area[0])
{
best_alt_ref = -1;
if(32 == block_width)
{
SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
}
else
{
SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
}
return;
}
if(best_uni_ref == ai4_ref_id[0])
{
for(k = 2; k < MAX_NUM_REF; k++)
{
if(ai4_bi_area[k] > ai4_bi_area[1])
{
SWAP_HME(ai4_bi_area[k], ai4_bi_area[1], S32);
SWAP_HME(ai4_ref_id[k], ai4_ref_id[1], S32);
}
}
best_alt_ref = ai4_ref_id[1];
}
else
{
best_alt_ref = ai4_ref_id[0];
}
}
if(32 == block_width)
{
SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
}
else
{
SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
}
}
/**
********************************************************************************
* @fn void hme_find_top_ref_ids
* (
* ctb_cluster_info_t *ps_ctb_cluster_info
* )
*
* @brief Finds best_uni_ref and best_alt_ref
*
* @param[in/out] ps_ctb_cluster_info: structure that points to ctb data
*
* @return None
********************************************************************************
*/
void hme_find_top_ref_ids(
ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width)
{
S32 i;
if(32 == block_width)
{
for(i = 0; i < 4; i++)
{
hme_sort_and_assign_top_ref_ids_areawise(
ps_ctb_cluster_info, bidir_enabled, block_width, (CU_POS_T)i);
}
}
else if(64 == block_width)
{
hme_sort_and_assign_top_ref_ids_areawise(
ps_ctb_cluster_info, bidir_enabled, block_width, POS_NA);
}
}
/**
********************************************************************************
* @fn void hme_boot_out_outlier
* (
* ctb_cluster_info_t *ps_ctb_cluster_info
* )
*
* @brief Removes outlier clusters before CU tree population
*
* @param[in/out] ps_ctb_cluster_info: structure that points to ctb data
*
* @return None
********************************************************************************
*/
void hme_boot_out_outlier(ctb_cluster_info_t *ps_ctb_cluster_info, S32 blk_width)
{
cluster_32x32_blk_t *ps_32x32;
S32 i;
cluster_64x64_blk_t *ps_64x64 = &ps_ctb_cluster_info->ps_64x64_blk[0];
S32 sdi_threshold = ps_ctb_cluster_info->sdi_threshold;
if(32 == blk_width)
{
/* 32x32 clusters */
for(i = 0; i < 4; i++)
{
ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
if(ps_32x32->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
{
BUMP_OUTLIER_CLUSTERS(ps_32x32, sdi_threshold);
}
}
}
else if(64 == blk_width)
{
/* 64x64 clusters */
if(ps_64x64->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
{
BUMP_OUTLIER_CLUSTERS(ps_64x64, sdi_threshold);
}
}
}
/**
********************************************************************************
* @fn void hme_update_cluster_attributes
* (
* cluster_data_t *ps_cluster_data,
* S32 mvx,
* S32 mvy,
* PART_ID_T e_part_id
* )
*
* @brief Implementation fo the clustering algorithm
*
* @param[in/out] ps_cluster_data: pointer to cluster_data_t struct
*
* @param[in] mvx : x co-ordinate of the motion vector
*
* @param[in] mvy : y co-ordinate of the motion vector
*
* @param[in] ref_idx : ref_id of the motion vector
*
* @param[in] e_part_id : partition id of the motion vector
*
* @return None
********************************************************************************
*/
static __inline void hme_update_cluster_attributes(
cluster_data_t *ps_cluster_data,
S32 mvx,
S32 mvy,
S32 mvdx,
S32 mvdy,
S32 ref_id,
S32 sdi,
U08 is_part_of_bi,
PART_ID_T e_part_id)
{
LWORD64 i8_mvx_sum_q8;
LWORD64 i8_mvy_sum_q8;
S32 centroid_posx_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
S32 centroid_posy_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
if((mvdx > 0) && (ps_cluster_data->min_x > mvx))
{
ps_cluster_data->min_x = mvx;
}
else if((mvdx < 0) && (ps_cluster_data->max_x < mvx))
{
ps_cluster_data->max_x = mvx;
}
if((mvdy > 0) && (ps_cluster_data->min_y > mvy))
{
ps_cluster_data->min_y = mvy;
}
else if((mvdy < 0) && (ps_cluster_data->max_y < mvy))
{
ps_cluster_data->max_y = mvy;
}
{
S32 num_mvs = ps_cluster_data->num_mvs;
ps_cluster_data->as_mv[num_mvs].pixel_count = gai4_partition_area[e_part_id];
ps_cluster_data->as_mv[num_mvs].mvx = mvx;
ps_cluster_data->as_mv[num_mvs].mvy = mvy;
/***************************/
ps_cluster_data->as_mv[num_mvs].is_uni = !is_part_of_bi;
ps_cluster_data->as_mv[num_mvs].sdi = sdi;
/**************************/
}
/* Updation of centroid */
{
i8_mvx_sum_q8 = (LWORD64)centroid_posx_q8 * ps_cluster_data->num_mvs + (mvx << 8);
i8_mvy_sum_q8 = (LWORD64)centroid_posy_q8 * ps_cluster_data->num_mvs + (mvy << 8);
ps_cluster_data->num_mvs++;
ps_cluster_data->s_centroid.i4_pos_x_q8 =
(WORD32)((i8_mvx_sum_q8) / ps_cluster_data->num_mvs);
ps_cluster_data->s_centroid.i4_pos_y_q8 =
(WORD32)((i8_mvy_sum_q8) / ps_cluster_data->num_mvs);
}
ps_cluster_data->area_in_pixels += gai4_partition_area[e_part_id];
if(is_part_of_bi)
{
ps_cluster_data->bi_mv_pixel_area += gai4_partition_area[e_part_id];
}
else
{
ps_cluster_data->uni_mv_pixel_area += gai4_partition_area[e_part_id];
}
}
/**
********************************************************************************
* @fn void hme_try_cluster_merge
* (
* cluster_data_t *ps_cluster_data,
* S32 *pi4_num_clusters,
* S32 idx_of_updated_cluster
* )
*
* @brief Implementation fo the clustering algorithm
*
* @param[in/out] ps_cluster_data: pointer to cluster_data_t struct
*
* @param[in/out] pi4_num_clusters : pointer to number of clusters
*
* @param[in] idx_of_updated_cluster : index of the cluster most recently
* updated
*
* @return Nothing
********************************************************************************
*/
void hme_try_cluster_merge(
cluster_data_t *ps_cluster_data, U08 *pu1_num_clusters, S32 idx_of_updated_cluster)
{
centroid_t *ps_centroid;
S32 cur_pos_x_q8;
S32 cur_pos_y_q8;
S32 i;
S32 max_dist_from_centroid;
S32 mvd;
S32 mvdx_q8;
S32 mvdx;
S32 mvdy_q8;
S32 mvdy;
S32 num_clusters, num_clusters_evaluated;
S32 other_pos_x_q8;
S32 other_pos_y_q8;
cluster_data_t *ps_root = ps_cluster_data;
cluster_data_t *ps_cur_cluster = &ps_cluster_data[idx_of_updated_cluster];
centroid_t *ps_cur_centroid = &ps_cur_cluster->s_centroid;
/* Merge is superfluous if num_clusters is 1 */
if(*pu1_num_clusters == 1)
{
return;
}
cur_pos_x_q8 = ps_cur_centroid->i4_pos_x_q8;
cur_pos_y_q8 = ps_cur_centroid->i4_pos_y_q8;
max_dist_from_centroid = ps_cur_cluster->max_dist_from_centroid;
num_clusters = *pu1_num_clusters;
num_clusters_evaluated = 0;
for(i = 0; num_clusters_evaluated < num_clusters; i++, ps_cluster_data++)
{
if(!ps_cluster_data->is_valid_cluster)
{
continue;
}
if((ps_cluster_data->ref_id != ps_cur_cluster->ref_id) || (i == idx_of_updated_cluster))
{
num_clusters_evaluated++;
continue;
}
ps_centroid = &ps_cluster_data->s_centroid;
other_pos_x_q8 = ps_centroid->i4_pos_x_q8;
other_pos_y_q8 = ps_centroid->i4_pos_y_q8;
mvdx_q8 = (cur_pos_x_q8 - other_pos_x_q8);
mvdy_q8 = (cur_pos_y_q8 - other_pos_y_q8);
mvdx = (mvdx_q8 + (1 << 7)) >> 8;
mvdy = (mvdy_q8 + (1 << 7)) >> 8;
mvd = ABS(mvdx) + ABS(mvdy);
if(mvd <= (max_dist_from_centroid >> 1))
{
/* 0 => no updates */
/* 1 => min updated */
/* 2 => max updated */
S32 minmax_x_update_id;
S32 minmax_y_update_id;
LWORD64 i8_mv_x_sum_self = (LWORD64)cur_pos_x_q8 * ps_cur_cluster->num_mvs;
LWORD64 i8_mv_y_sum_self = (LWORD64)cur_pos_y_q8 * ps_cur_cluster->num_mvs;
LWORD64 i8_mv_x_sum_cousin = (LWORD64)other_pos_x_q8 * ps_cluster_data->num_mvs;
LWORD64 i8_mv_y_sum_cousin = (LWORD64)other_pos_y_q8 * ps_cluster_data->num_mvs;
(*pu1_num_clusters)--;
ps_cluster_data->is_valid_cluster = 0;
memcpy(
&ps_cur_cluster->as_mv[ps_cur_cluster->num_mvs],
ps_cluster_data->as_mv,
sizeof(mv_data_t) * ps_cluster_data->num_mvs);
ps_cur_cluster->num_mvs += ps_cluster_data->num_mvs;
ps_cur_cluster->area_in_pixels += ps_cluster_data->area_in_pixels;
ps_cur_cluster->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
ps_cur_cluster->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
i8_mv_x_sum_self += i8_mv_x_sum_cousin;
i8_mv_y_sum_self += i8_mv_y_sum_cousin;
ps_cur_centroid->i4_pos_x_q8 = (WORD32)(i8_mv_x_sum_self / ps_cur_cluster->num_mvs);
ps_cur_centroid->i4_pos_y_q8 = (WORD32)(i8_mv_y_sum_self / ps_cur_cluster->num_mvs);
minmax_x_update_id = (ps_cur_cluster->min_x < ps_cluster_data->min_x)
? ((ps_cur_cluster->max_x > ps_cluster_data->max_x) ? 0 : 2)
: 1;
minmax_y_update_id = (ps_cur_cluster->min_y < ps_cluster_data->min_y)
? ((ps_cur_cluster->max_y > ps_cluster_data->max_y) ? 0 : 2)
: 1;
/* Updation of centroid spread */
switch(minmax_x_update_id + (minmax_y_update_id << 2))
{
case 1:
{
S32 mvd, mvd_q8;
ps_cur_cluster->min_x = ps_cluster_data->min_x;
mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
mvd = (mvd_q8 + (1 << 7)) >> 8;
if(mvd > (max_dist_from_centroid))
{
ps_cluster_data->max_dist_from_centroid = mvd;
}
break;
}
case 2:
{
S32 mvd, mvd_q8;
ps_cur_cluster->max_x = ps_cluster_data->max_x;
mvd_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
mvd = (mvd_q8 + (1 << 7)) >> 8;
if(mvd > (max_dist_from_centroid))
{
ps_cluster_data->max_dist_from_centroid = mvd;
}
break;
}
case 4:
{
S32 mvd, mvd_q8;
ps_cur_cluster->min_y = ps_cluster_data->min_y;
mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
mvd = (mvd_q8 + (1 << 7)) >> 8;
if(mvd > (max_dist_from_centroid))
{
ps_cluster_data->max_dist_from_centroid = mvd;
}
break;
}
case 5:
{
S32 mvd;
S32 mvdx, mvdx_q8;
S32 mvdy, mvdy_q8;
mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
mvdy = (mvdy_q8 + (1 << 7)) >> 8;
mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
mvdx = (mvdx_q8 + (1 << 7)) >> 8;
mvd = (mvdx > mvdy) ? mvdx : mvdy;
ps_cur_cluster->min_x = ps_cluster_data->min_x;
ps_cur_cluster->min_y = ps_cluster_data->min_y;
if(mvd > max_dist_from_centroid)
{
ps_cluster_data->max_dist_from_centroid = mvd;
}
break;
}
case 6:
{
S32 mvd;
S32 mvdx, mvdx_q8;
S32 mvdy, mvdy_q8;
mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
mvdy = (mvdy_q8 + (1 << 7)) >> 8;
mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
mvdx = (mvdx_q8 + (1 << 7)) >> 8;
mvd = (mvdx > mvdy) ? mvdx : mvdy;
ps_cur_cluster->max_x = ps_cluster_data->max_x;
ps_cur_cluster->min_y = ps_cluster_data->min_y;
if(mvd > max_dist_from_centroid)
{
ps_cluster_data->max_dist_from_centroid = mvd;
}
break;
}
case 8:
{
S32 mvd, mvd_q8;
ps_cur_cluster->max_y = ps_cluster_data->max_y;
mvd_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
mvd = (mvd_q8 + (1 << 7)) >> 8;
if(mvd > (max_dist_from_centroid))
{
ps_cluster_data->max_dist_from_centroid = mvd;
}
break;
}
case 9:
{
S32 mvd;
S32 mvdx, mvdx_q8;
S32 mvdy, mvdy_q8;
mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
mvdx = (mvdx_q8 + (1 << 7)) >> 8;
mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
mvdy = (mvdy_q8 + (1 << 7)) >> 8;
mvd = (mvdx > mvdy) ? mvdx : mvdy;
ps_cur_cluster->min_x = ps_cluster_data->min_x;
ps_cur_cluster->max_y = ps_cluster_data->max_y;
if(mvd > max_dist_from_centroid)
{
ps_cluster_data->max_dist_from_centroid = mvd;
}
break;
}
case 10:
{
S32 mvd;
S32 mvdx, mvdx_q8;
S32 mvdy, mvdy_q8;
mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
mvdx = (mvdx_q8 + (1 << 7)) >> 8;
mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
mvdy = (mvdy_q8 + (1 << 7)) >> 8;
mvd = (mvdx > mvdy) ? mvdx : mvdy;
ps_cur_cluster->max_x = ps_cluster_data->max_x;
ps_cur_cluster->max_y = ps_cluster_data->max_y;
if(mvd > ps_cluster_data->max_dist_from_centroid)
{
ps_cluster_data->max_dist_from_centroid = mvd;
}
break;
}
default:
{
break;
}
}
hme_try_cluster_merge(ps_root, pu1_num_clusters, idx_of_updated_cluster);
return;
}
num_clusters_evaluated++;
}
}
/**
********************************************************************************
* @fn void hme_find_and_update_clusters
* (
* cluster_data_t *ps_cluster_data,
* S32 *pi4_num_clusters,
* S32 mvx,
* S32 mvy,
* S32 ref_idx,
* PART_ID_T e_part_id
* )
*
* @brief Implementation fo the clustering algorithm
*
* @param[in/out] ps_cluster_data: pointer to cluster_data_t struct
*
* @param[in/out] pi4_num_clusters : pointer to number of clusters
*
* @param[in] mvx : x co-ordinate of the motion vector
*
* @param[in] mvy : y co-ordinate of the motion vector
*
* @param[in] ref_idx : ref_id of the motion vector
*
* @param[in] e_part_id : partition id of the motion vector
*
* @return None
********************************************************************************
*/
void hme_find_and_update_clusters(
cluster_data_t *ps_cluster_data,
U08 *pu1_num_clusters,
S16 i2_mv_x,
S16 i2_mv_y,
U08 i1_ref_idx,
S32 i4_sdi,
PART_ID_T e_part_id,
U08 is_part_of_bi)
{
S32 i;
S32 min_mvd_cluster_id = -1;
S32 mvd, mvd_limit, mvdx, mvdy;
S32 min_mvdx, min_mvdy;
S32 min_mvd = MAX_32BIT_VAL;
S32 num_clusters = *pu1_num_clusters;
S32 mvx = i2_mv_x;
S32 mvy = i2_mv_y;
S32 ref_idx = i1_ref_idx;
S32 sdi = i4_sdi;
S32 new_cluster_idx = MAX_NUM_CLUSTERS_16x16;
if(num_clusters == 0)
{
cluster_data_t *ps_data = &ps_cluster_data[num_clusters];
ps_data->num_mvs = 1;
ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
ps_data->ref_id = ref_idx;
ps_data->area_in_pixels = gai4_partition_area[e_part_id];
ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
ps_data->as_mv[0].mvx = mvx;
ps_data->as_mv[0].mvy = mvy;
/***************************/
ps_data->as_mv[0].is_uni = !is_part_of_bi;
ps_data->as_mv[0].sdi = sdi;
if(is_part_of_bi)
{
ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
}
else
{
ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
}
/**************************/
ps_data->max_x = mvx;
ps_data->min_x = mvx;
ps_data->max_y = mvy;
ps_data->min_y = mvy;
ps_data->is_valid_cluster = 1;
*pu1_num_clusters = 1;
}
else
{
S32 num_clusters_evaluated = 0;
for(i = 0; num_clusters_evaluated < num_clusters; i++)
{
cluster_data_t *ps_data = &ps_cluster_data[i];
centroid_t *ps_centroid;
S32 mvx_q8;
S32 mvy_q8;
S32 posx_q8;
S32 posy_q8;
S32 mvdx_q8;
S32 mvdy_q8;
/* In anticipation of a possible merging of clusters */
if(ps_data->is_valid_cluster == 0)
{
new_cluster_idx = i;
continue;
}
if(ref_idx != ps_data->ref_id)
{
num_clusters_evaluated++;
continue;
}
ps_centroid = &ps_data->s_centroid;
posx_q8 = ps_centroid->i4_pos_x_q8;
posy_q8 = ps_centroid->i4_pos_y_q8;
mvx_q8 = mvx << 8;
mvy_q8 = mvy << 8;
mvdx_q8 = posx_q8 - mvx_q8;
mvdy_q8 = posy_q8 - mvy_q8;
mvdx = (((mvdx_q8 + (1 << 7)) >> 8));
mvdy = (((mvdy_q8 + (1 << 7)) >> 8));
mvd = ABS(mvdx) + ABS(mvdy);
if(mvd < min_mvd)
{
min_mvd = mvd;
min_mvdx = mvdx;
min_mvdy = mvdy;
min_mvd_cluster_id = i;
}
num_clusters_evaluated++;
}
mvd_limit = (min_mvd_cluster_id == -1)
? ps_cluster_data[0].max_dist_from_centroid
: ps_cluster_data[min_mvd_cluster_id].max_dist_from_centroid;
/* This condition implies that min_mvd has been updated */
if(min_mvd <= mvd_limit)
{
hme_update_cluster_attributes(
&ps_cluster_data[min_mvd_cluster_id],
mvx,
mvy,
min_mvdx,
min_mvdy,
ref_idx,
sdi,
is_part_of_bi,
e_part_id);
if(PRT_NxN == ge_part_id_to_part_type[e_part_id])
{
hme_try_cluster_merge(ps_cluster_data, pu1_num_clusters, min_mvd_cluster_id);
}
}
else
{
cluster_data_t *ps_data = (new_cluster_idx == MAX_NUM_CLUSTERS_16x16)
? &ps_cluster_data[num_clusters]
: &ps_cluster_data[new_cluster_idx];
ps_data->num_mvs = 1;
ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
ps_data->ref_id = ref_idx;
ps_data->area_in_pixels = gai4_partition_area[e_part_id];
ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
ps_data->as_mv[0].mvx = mvx;
ps_data->as_mv[0].mvy = mvy;
/***************************/
ps_data->as_mv[0].is_uni = !is_part_of_bi;
ps_data->as_mv[0].sdi = sdi;
if(is_part_of_bi)
{
ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
}
else
{
ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
}
/**************************/
ps_data->max_x = mvx;
ps_data->min_x = mvx;
ps_data->max_y = mvy;
ps_data->min_y = mvy;
ps_data->is_valid_cluster = 1;
num_clusters++;
*pu1_num_clusters = num_clusters;
}
}
}
/**
********************************************************************************
* @fn void hme_update_32x32_cluster_attributes
* (
* cluster_32x32_blk_t *ps_blk_32x32,
* cluster_data_t *ps_cluster_data
* )
*
* @brief Updates attributes for 32x32 clusters based on the attributes of
* the constituent 16x16 clusters
*
* @param[out] ps_blk_32x32: structure containing 32x32 block results
*
* @param[in] ps_cluster_data : structure containing 16x16 block results
*
* @return None
********************************************************************************
*/
void hme_update_32x32_cluster_attributes(
cluster_32x32_blk_t *ps_blk_32x32, cluster_data_t *ps_cluster_data)
{
cluster_data_t *ps_cur_cluster_32;
S32 i;
S32 mvd_limit;
S32 num_clusters = ps_blk_32x32->num_clusters;
if(0 == num_clusters)
{
ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
ps_blk_32x32->num_clusters++;
ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
ps_cur_cluster_32->is_valid_cluster = 1;
ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
memcpy(
ps_cur_cluster_32->as_mv,
ps_cluster_data->as_mv,
sizeof(mv_data_t) * ps_cluster_data->num_mvs);
ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
}
else
{
centroid_t *ps_centroid;
S32 cur_posx_q8, cur_posy_q8;
S32 min_mvd_cluster_id = -1;
S32 mvd;
S32 mvdx;
S32 mvdy;
S32 mvdx_min;
S32 mvdy_min;
S32 mvdx_q8;
S32 mvdy_q8;
S32 num_clusters_evaluated = 0;
S32 mvd_min = MAX_32BIT_VAL;
S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
for(i = 0; num_clusters_evaluated < num_clusters; i++)
{
ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[i];
if(ps_cur_cluster_32->ref_id != ps_cluster_data->ref_id)
{
num_clusters_evaluated++;
continue;
}
if(!ps_cluster_data->is_valid_cluster)
{
continue;
}
num_clusters_evaluated++;
ps_centroid = &ps_cur_cluster_32->s_centroid;
cur_posx_q8 = ps_centroid->i4_pos_x_q8;
cur_posy_q8 = ps_centroid->i4_pos_y_q8;
mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
mvdx = (mvdx_q8 + (1 << 7)) >> 8;
mvdy = (mvdy_q8 + (1 << 7)) >> 8;
mvd = ABS(mvdx) + ABS(mvdy);
if(mvd < mvd_min)
{
mvd_min = mvd;
mvdx_min = mvdx;
mvdy_min = mvdy;
min_mvd_cluster_id = i;
}
}
ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
mvd_limit = (min_mvd_cluster_id == -1)
? ps_cur_cluster_32[0].max_dist_from_centroid
: ps_cur_cluster_32[min_mvd_cluster_id].max_dist_from_centroid;
if(mvd_min <= mvd_limit)
{
LWORD64 i8_updated_posx;
LWORD64 i8_updated_posy;
WORD32 minmax_updated_x = 0;
WORD32 minmax_updated_y = 0;
ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[min_mvd_cluster_id];
ps_centroid = &ps_cur_cluster_32->s_centroid;
ps_cur_cluster_32->is_valid_cluster = 1;
ps_cur_cluster_32->area_in_pixels += ps_cluster_data->area_in_pixels;
ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
memcpy(
&ps_cur_cluster_32->as_mv[ps_cur_cluster_32->num_mvs],
ps_cluster_data->as_mv,
sizeof(mv_data_t) * ps_cluster_data->num_mvs);
if((mvdx_min > 0) && ((ps_cur_cluster_32->min_x << 8) > mvx_inp_q8))
{
ps_cur_cluster_32->min_x = (mvx_inp_q8 + ((1 << 7))) >> 8;
minmax_updated_x = 1;
}
else if((mvdx_min < 0) && ((ps_cur_cluster_32->max_x << 8) < mvx_inp_q8))
{
ps_cur_cluster_32->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
minmax_updated_x = 2;
}
if((mvdy_min > 0) && ((ps_cur_cluster_32->min_y << 8) > mvy_inp_q8))
{
ps_cur_cluster_32->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
minmax_updated_y = 1;
}
else if((mvdy_min < 0) && ((ps_cur_cluster_32->max_y << 8) < mvy_inp_q8))
{
ps_cur_cluster_32->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
minmax_updated_y = 2;
}
switch((minmax_updated_y << 2) + minmax_updated_x)
{
case 1:
{
S32 mvd, mvd_q8;
mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
mvd = (mvd_q8 + (1 << 7)) >> 8;
if(mvd > (mvd_limit))
{
ps_cur_cluster_32->max_dist_from_centroid = mvd;
}
break;
}
case 2:
{
S32 mvd, mvd_q8;
mvd_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
mvd = (mvd_q8 + (1 << 7)) >> 8;
if(mvd > (mvd_limit))
{
ps_cur_cluster_32->max_dist_from_centroid = mvd;
}
break;
}
case 4:
{
S32 mvd, mvd_q8;
mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
mvd = (mvd_q8 + (1 << 7)) >> 8;
if(mvd > (mvd_limit))
{
ps_cur_cluster_32->max_dist_from_centroid = mvd;
}
break;
}
case 5:
{
S32 mvd;
S32 mvdx, mvdx_q8;
S32 mvdy, mvdy_q8;
mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
mvdy = (mvdy_q8 + (1 << 7)) >> 8;
mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
mvdx = (mvdx_q8 + (1 << 7)) >> 8;
mvd = (mvdx > mvdy) ? mvdx : mvdy;
if(mvd > mvd_limit)
{
ps_cur_cluster_32->max_dist_from_centroid = mvd;
}
break;
}
case 6:
{
S32 mvd;
S32 mvdx, mvdx_q8;
S32 mvdy, mvdy_q8;
mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
mvdy = (mvdy_q8 + (1 << 7)) >> 8;
mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
mvdx = (mvdx_q8 + (1 << 7)) >> 8;
mvd = (mvdx > mvdy) ? mvdx : mvdy;
if(mvd > mvd_limit)
{
ps_cur_cluster_32->max_dist_from_centroid = mvd;
}
break;
}
case 8:
{
S32 mvd, mvd_q8;
mvd_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
mvd = (mvd_q8 + (1 << 7)) >> 8;
if(mvd > (mvd_limit))
{
ps_cur_cluster_32->max_dist_from_centroid = mvd;
}
break;
}
case 9:
{
S32 mvd;
S32 mvdx, mvdx_q8;
S32 mvdy, mvdy_q8;
mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
mvdx = (mvdx_q8 + (1 << 7)) >> 8;
mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
mvdy = (mvdy_q8 + (1 << 7)) >> 8;
mvd = (mvdx > mvdy) ? mvdx : mvdy;
if(mvd > mvd_limit)
{
ps_cur_cluster_32->max_dist_from_centroid = mvd;
}
break;
}
case 10:
{
S32 mvd;
S32 mvdx, mvdx_q8;
S32 mvdy, mvdy_q8;
mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
mvdx = (mvdx_q8 + (1 << 7)) >> 8;
mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
mvdy = (mvdy_q8 + (1 << 7)) >> 8;
mvd = (mvdx > mvdy) ? mvdx : mvdy;
if(mvd > ps_cur_cluster_32->max_dist_from_centroid)
{
ps_cur_cluster_32->max_dist_from_centroid = mvd;
}
break;
}
default:
{
break;
}
}
i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_32->num_mvs) +
((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_32->num_mvs) +
((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
ps_cur_cluster_32->num_mvs += ps_cluster_data->num_mvs;
ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_32->num_mvs);
ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_32->num_mvs);
}
else if(num_clusters < MAX_NUM_CLUSTERS_32x32)
{
ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[num_clusters];
ps_blk_32x32->num_clusters++;
ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
ps_cur_cluster_32->is_valid_cluster = 1;
ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
memcpy(
ps_cur_cluster_32->as_mv,
ps_cluster_data->as_mv,
sizeof(mv_data_t) * ps_cluster_data->num_mvs);
ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
}
}
}
/**
********************************************************************************
* @fn void hme_update_64x64_cluster_attributes
* (
* cluster_64x64_blk_t *ps_blk_32x32,
* cluster_data_t *ps_cluster_data
* )
*
* @brief Updates attributes for 64x64 clusters based on the attributes of
* the constituent 16x16 clusters
*
* @param[out] ps_blk_64x64: structure containing 64x64 block results
*
* @param[in] ps_cluster_data : structure containing 32x32 block results
*
* @return None
********************************************************************************
*/
void hme_update_64x64_cluster_attributes(
cluster_64x64_blk_t *ps_blk_64x64, cluster_data_t *ps_cluster_data)
{
cluster_data_t *ps_cur_cluster_64;
S32 i;
S32 mvd_limit;
S32 num_clusters = ps_blk_64x64->num_clusters;
if(0 == num_clusters)
{
ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[0];
ps_blk_64x64->num_clusters++;
ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
ps_cur_cluster_64->is_valid_cluster = 1;
ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
memcpy(
ps_cur_cluster_64->as_mv,
ps_cluster_data->as_mv,
sizeof(mv_data_t) * ps_cluster_data->num_mvs);
ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
}
else
{
centroid_t *ps_centroid;
S32 cur_posx_q8, cur_posy_q8;
S32 min_mvd_cluster_id = -1;
S32 mvd;
S32 mvdx;
S32 mvdy;
S32 mvdx_min;
S32 mvdy_min;
S32 mvdx_q8;
S32 mvdy_q8;
S32 num_clusters_evaluated = 0;
S32 mvd_min = MAX_32BIT_VAL;
S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
for(i = 0; num_clusters_evaluated < num_clusters; i++)
{
ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[i];
if(ps_cur_cluster_64->ref_id != ps_cluster_data->ref_id)
{
num_clusters_evaluated++;
continue;
}
if(!ps_cur_cluster_64->is_valid_cluster)
{
continue;
}
num_clusters_evaluated++;
ps_centroid = &ps_cur_cluster_64->s_centroid;
cur_posx_q8 = ps_centroid->i4_pos_x_q8;
cur_posy_q8 = ps_centroid->i4_pos_y_q8;
mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
mvdx = (mvdx_q8 + (1 << 7)) >> 8;
mvdy = (mvdy_q8 + (1 << 7)) >> 8;
mvd = ABS(mvdx) + ABS(mvdy);
if(mvd < mvd_min)
{
mvd_min = mvd;
mvdx_min = mvdx;
mvdy_min = mvdy;
min_mvd_cluster_id = i;
}
}
ps_cur_cluster_64 = ps_blk_64x64->as_cluster_data;
mvd_limit = (min_mvd_cluster_id == -1)
? ps_cur_cluster_64[0].max_dist_from_centroid
: ps_cur_cluster_64[min_mvd_cluster_id].max_dist_from_centroid;
if(mvd_min <= mvd_limit)
{
LWORD64 i8_updated_posx;
LWORD64 i8_updated_posy;
WORD32 minmax_updated_x = 0;
WORD32 minmax_updated_y = 0;
ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[min_mvd_cluster_id];
ps_centroid = &ps_cur_cluster_64->s_centroid;
ps_cur_cluster_64->is_valid_cluster = 1;
ps_cur_cluster_64->area_in_pixels += ps_cluster_data->area_in_pixels;
ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
memcpy(
&ps_cur_cluster_64->as_mv[ps_cur_cluster_64->num_mvs],
ps_cluster_data->as_mv,
sizeof(mv_data_t) * ps_cluster_data->num_mvs);
if((mvdx_min > 0) && ((ps_cur_cluster_64->min_x << 8) > mvx_inp_q8))
{
ps_cur_cluster_64->min_x = (mvx_inp_q8 + (1 << 7)) >> 8;
minmax_updated_x = 1;
}
else if((mvdx_min < 0) && ((ps_cur_cluster_64->max_x << 8) < mvx_inp_q8))
{
ps_cur_cluster_64->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
minmax_updated_x = 2;
}
if((mvdy_min > 0) && ((ps_cur_cluster_64->min_y << 8) > mvy_inp_q8))
{
ps_cur_cluster_64->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
minmax_updated_y = 1;
}
else if((mvdy_min < 0) && ((ps_cur_cluster_64->max_y << 8) < mvy_inp_q8))
{
ps_cur_cluster_64->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
minmax_updated_y = 2;
}
switch((minmax_updated_y << 2) + minmax_updated_x)
{
case 1:
{
S32 mvd, mvd_q8;
mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
mvd = (mvd_q8 + (1 << 7)) >> 8;
if(mvd > (mvd_limit))
{
ps_cur_cluster_64->max_dist_from_centroid = mvd;
}
break;
}
case 2:
{
S32 mvd, mvd_q8;
mvd_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
mvd = (mvd_q8 + (1 << 7)) >> 8;
if(mvd > (mvd_limit))
{
ps_cur_cluster_64->max_dist_from_centroid = mvd;
}
break;
}
case 4:
{
S32 mvd, mvd_q8;
mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
mvd = (mvd_q8 + (1 << 7)) >> 8;
if(mvd > (mvd_limit))
{
ps_cur_cluster_64->max_dist_from_centroid = mvd;
}
break;
}
case 5:
{
S32 mvd;
S32 mvdx, mvdx_q8;
S32 mvdy, mvdy_q8;
mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
mvdy = (mvdy_q8 + (1 << 7)) >> 8;
mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
mvdx = (mvdx_q8 + (1 << 7)) >> 8;
mvd = (mvdx > mvdy) ? mvdx : mvdy;
if(mvd > mvd_limit)
{
ps_cur_cluster_64->max_dist_from_centroid = mvd;
}
break;
}
case 6:
{
S32 mvd;
S32 mvdx, mvdx_q8;
S32 mvdy, mvdy_q8;
mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
mvdy = (mvdy_q8 + (1 << 7)) >> 8;
mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
mvdx = (mvdx_q8 + (1 << 7)) >> 8;
mvd = (mvdx > mvdy) ? mvdx : mvdy;
if(mvd > mvd_limit)
{
ps_cur_cluster_64->max_dist_from_centroid = mvd;
}
break;
}
case 8:
{
S32 mvd, mvd_q8;
mvd_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
mvd = (mvd_q8 + (1 << 7)) >> 8;
if(mvd > (mvd_limit))
{
ps_cur_cluster_64->max_dist_from_centroid = mvd;
}
break;
}
case 9:
{
S32 mvd;
S32 mvdx, mvdx_q8;
S32 mvdy, mvdy_q8;
mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
mvdx = (mvdx_q8 + (1 << 7)) >> 8;
mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
mvdy = (mvdy_q8 + (1 << 7)) >> 8;
mvd = (mvdx > mvdy) ? mvdx : mvdy;
if(mvd > mvd_limit)
{
ps_cur_cluster_64->max_dist_from_centroid = mvd;
}
break;
}
case 10:
{
S32 mvd;
S32 mvdx, mvdx_q8;
S32 mvdy, mvdy_q8;
mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
mvdx = (mvdx_q8 + (1 << 7)) >> 8;
mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
mvdy = (mvdy_q8 + (1 << 7)) >> 8;
mvd = (mvdx > mvdy) ? mvdx : mvdy;
if(mvd > ps_cur_cluster_64->max_dist_from_centroid)
{
ps_cur_cluster_64->max_dist_from_centroid = mvd;
}
break;
}
default:
{
break;
}
}
i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_64->num_mvs) +
((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_64->num_mvs) +
((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
ps_cur_cluster_64->num_mvs += ps_cluster_data->num_mvs;
ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_64->num_mvs);
ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_64->num_mvs);
}
else if(num_clusters < MAX_NUM_CLUSTERS_64x64)
{
ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[num_clusters];
ps_blk_64x64->num_clusters++;
ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
ps_cur_cluster_64->is_valid_cluster = 1;
ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
memcpy(
&ps_cur_cluster_64->as_mv[0],
ps_cluster_data->as_mv,
sizeof(mv_data_t) * ps_cluster_data->num_mvs);
ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
}
}
}
/**
********************************************************************************
* @fn void hme_update_32x32_clusters
* (
* cluster_32x32_blk_t *ps_blk_32x32,
* cluster_16x16_blk_t *ps_blk_16x16
* )
*
* @brief Updates attributes for 32x32 clusters based on the attributes of
* the constituent 16x16 clusters
*
* @param[out] ps_blk_32x32: structure containing 32x32 block results
*
* @param[in] ps_blk_16x16 : structure containing 16x16 block results
*
* @return None
********************************************************************************
*/
static __inline void
hme_update_32x32_clusters(cluster_32x32_blk_t *ps_blk_32x32, cluster_16x16_blk_t *ps_blk_16x16)
{
cluster_16x16_blk_t *ps_blk_16x16_cur;
cluster_data_t *ps_cur_cluster;
S32 i, j;
S32 num_clusters_cur_16x16_blk;
for(i = 0; i < 4; i++)
{
S32 num_clusters_evaluated = 0;
ps_blk_16x16_cur = &ps_blk_16x16[i];
num_clusters_cur_16x16_blk = ps_blk_16x16_cur->num_clusters;
ps_blk_32x32->intra_mv_area += ps_blk_16x16_cur->intra_mv_area;
ps_blk_32x32->best_inter_cost += ps_blk_16x16_cur->best_inter_cost;
for(j = 0; num_clusters_evaluated < num_clusters_cur_16x16_blk; j++)
{
ps_cur_cluster = &ps_blk_16x16_cur->as_cluster_data[j];
if(!ps_cur_cluster->is_valid_cluster)
{
continue;
}
hme_update_32x32_cluster_attributes(ps_blk_32x32, ps_cur_cluster);
num_clusters_evaluated++;
}
}
}
/**
********************************************************************************
* @fn void hme_update_64x64_clusters
* (
* cluster_64x64_blk_t *ps_blk_64x64,
* cluster_32x32_blk_t *ps_blk_32x32
* )
*
* @brief Updates attributes for 64x64 clusters based on the attributes of
* the constituent 16x16 clusters
*
* @param[out] ps_blk_64x64: structure containing 32x32 block results
*
* @param[in] ps_blk_32x32 : structure containing 16x16 block results
*
* @return None
********************************************************************************
*/
static __inline void
hme_update_64x64_clusters(cluster_64x64_blk_t *ps_blk_64x64, cluster_32x32_blk_t *ps_blk_32x32)
{
cluster_32x32_blk_t *ps_blk_32x32_cur;
cluster_data_t *ps_cur_cluster;
S32 i, j;
S32 num_clusters_cur_32x32_blk;
for(i = 0; i < 4; i++)
{
S32 num_clusters_evaluated = 0;
ps_blk_32x32_cur = &ps_blk_32x32[i];
num_clusters_cur_32x32_blk = ps_blk_32x32_cur->num_clusters;
ps_blk_64x64->intra_mv_area += ps_blk_32x32_cur->intra_mv_area;
ps_blk_64x64->best_inter_cost += ps_blk_32x32_cur->best_inter_cost;
for(j = 0; num_clusters_evaluated < num_clusters_cur_32x32_blk; j++)
{
ps_cur_cluster = &ps_blk_32x32_cur->as_cluster_data[j];
if(!ps_cur_cluster->is_valid_cluster)
{
continue;
}
hme_update_64x64_cluster_attributes(ps_blk_64x64, ps_cur_cluster);
num_clusters_evaluated++;
}
}
}
/**
********************************************************************************
* @fn void hme_try_merge_clusters_blksize_gt_16
* (
* cluster_data_t *ps_cluster_data,
* S32 num_clusters
* )
*
* @brief Merging clusters from blocks of size 32x32 and greater
*
* @param[in/out] ps_cluster_data: structure containing cluster data
*
* @param[in/out] pi4_num_clusters : pointer to number of clusters
*
* @return Success or failure
********************************************************************************
*/
S32 hme_try_merge_clusters_blksize_gt_16(cluster_data_t *ps_cluster_data, S32 num_clusters)
{
centroid_t *ps_cur_centroid;
cluster_data_t *ps_cur_cluster;
S32 i, mvd;
S32 mvdx, mvdy, mvdx_q8, mvdy_q8;
centroid_t *ps_centroid = &ps_cluster_data->s_centroid;
S32 mvd_limit = ps_cluster_data->max_dist_from_centroid;
S32 ref_id = ps_cluster_data->ref_id;
S32 node0_posx_q8 = ps_centroid->i4_pos_x_q8;
S32 node0_posy_q8 = ps_centroid->i4_pos_y_q8;
S32 num_clusters_evaluated = 1;
S32 ret_value = 0;
if(1 >= num_clusters)
{
return ret_value;
}
for(i = 1; num_clusters_evaluated < num_clusters; i++)
{
S32 cur_posx_q8;
S32 cur_posy_q8;
ps_cur_cluster = &ps_cluster_data[i];
if((ref_id != ps_cur_cluster->ref_id))
{
num_clusters_evaluated++;
continue;
}
if((!ps_cur_cluster->is_valid_cluster))
{
continue;
}
num_clusters_evaluated++;
ps_cur_centroid = &ps_cur_cluster->s_centroid;
cur_posx_q8 = ps_cur_centroid->i4_pos_x_q8;
cur_posy_q8 = ps_cur_centroid->i4_pos_y_q8;
mvdx_q8 = cur_posx_q8 - node0_posx_q8;
mvdy_q8 = cur_posy_q8 - node0_posy_q8;
mvdx = (mvdx_q8 + (1 << 7)) >> 8;
mvdy = (mvdy_q8 + (1 << 7)) >> 8;
mvd = ABS(mvdx) + ABS(mvdy);
if(mvd <= (mvd_limit >> 1))
{
LWORD64 i8_updated_posx;
LWORD64 i8_updated_posy;
WORD32 minmax_updated_x = 0;
WORD32 minmax_updated_y = 0;
ps_cur_cluster->is_valid_cluster = 0;
ps_cluster_data->area_in_pixels += ps_cur_cluster->area_in_pixels;
ps_cluster_data->bi_mv_pixel_area += ps_cur_cluster->bi_mv_pixel_area;
ps_cluster_data->uni_mv_pixel_area += ps_cur_cluster->uni_mv_pixel_area;
memcpy(
&ps_cluster_data->as_mv[ps_cluster_data->num_mvs],
ps_cur_cluster->as_mv,
sizeof(mv_data_t) * ps_cur_cluster->num_mvs);
if(mvdx > 0)
{
ps_cluster_data->min_x = (cur_posx_q8 + (1 << 7)) >> 8;
minmax_updated_x = 1;
}
else
{
ps_cluster_data->max_x = (cur_posx_q8 + (1 << 7)) >> 8;
minmax_updated_x = 2;
}
if(mvdy > 0)
{
ps_cluster_data->min_y = (cur_posy_q8 + (1 << 7)) >> 8;
minmax_updated_y = 1;
}
else
{
ps_cluster_data->max_y = (cur_posy_q8 + (1 << 7)) >> 8;
minmax_updated_y = 2;
}
switch((minmax_updated_y << 2) + minmax_updated_x)
{
case 1:
{
S32 mvd, mvd_q8;
mvd_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
mvd = (mvd_q8 + (1 << 7)) >> 8;
if(mvd > (mvd_limit))
{
ps_cluster_data->max_dist_from_centroid = mvd;
}
break;
}
case 2:
{
S32 mvd, mvd_q8;
mvd_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
mvd = (mvd_q8 + (1 << 7)) >> 8;
if(mvd > (mvd_limit))
{
ps_cluster_data->max_dist_from_centroid = mvd;
}
break;
}
case 4:
{
S32 mvd, mvd_q8;
mvd_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
mvd = (mvd_q8 + (1 << 7)) >> 8;
if(mvd > (mvd_limit))
{
ps_cluster_data->max_dist_from_centroid = mvd;
}
break;
}
case 5:
{
S32 mvd;
S32 mvdx, mvdx_q8;
S32 mvdy, mvdy_q8;
mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
mvdy = (mvdy_q8 + (1 << 7)) >> 8;
mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
mvdx = (mvdx_q8 + (1 << 7)) >> 8;
mvd = (mvdx > mvdy) ? mvdx : mvdy;
if(mvd > mvd_limit)
{
ps_cluster_data->max_dist_from_centroid = mvd;
}
break;
}
case 6:
{
S32 mvd;
S32 mvdx, mvdx_q8;
S32 mvdy, mvdy_q8;
mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
mvdy = (mvdy_q8 + (1 << 7)) >> 8;
mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
mvdx = (mvdx_q8 + (1 << 7)) >> 8;
mvd = (mvdx > mvdy) ? mvdx : mvdy;
if(mvd > mvd_limit)
{
ps_cluster_data->max_dist_from_centroid = mvd;
}
break;
}
case 8:
{
S32 mvd, mvd_q8;
mvd_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
mvd = (mvd_q8 + (1 << 7)) >> 8;
if(mvd > (mvd_limit))
{
ps_cluster_data->max_dist_from_centroid = mvd;
}
break;
}
case 9:
{
S32 mvd;
S32 mvdx, mvdx_q8;
S32 mvdy, mvdy_q8;
mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
mvdx = (mvdx_q8 + (1 << 7)) >> 8;
mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
mvdy = (mvdy_q8 + (1 << 7)) >> 8;
mvd = (mvdx > mvdy) ? mvdx : mvdy;
if(mvd > mvd_limit)
{
ps_cluster_data->max_dist_from_centroid = mvd;
}
break;
}
case 10:
{
S32 mvd;
S32 mvdx, mvdx_q8;
S32 mvdy, mvdy_q8;
mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
mvdx = (mvdx_q8 + (1 << 7)) >> 8;
mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
mvdy = (mvdy_q8 + (1 << 7)) >> 8;
mvd = (mvdx > mvdy) ? mvdx : mvdy;
if(mvd > ps_cluster_data->max_dist_from_centroid)
{
ps_cluster_data->max_dist_from_centroid = mvd;
}
break;
}
default:
{
break;
}
}
i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cluster_data->num_mvs) +
((LWORD64)cur_posx_q8 * ps_cur_cluster->num_mvs);
i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cluster_data->num_mvs) +
((LWORD64)cur_posy_q8 * ps_cur_cluster->num_mvs);
ps_cluster_data->num_mvs += ps_cur_cluster->num_mvs;
ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cluster_data->num_mvs);
ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cluster_data->num_mvs);
if(MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK >= num_clusters)
{
num_clusters--;
num_clusters_evaluated = 1;
i = 0;
ret_value++;
}
else
{
ret_value++;
return ret_value;
}
}
}
if(ret_value)
{
for(i = 1; i < (num_clusters + ret_value); i++)
{
if(ps_cluster_data[i].is_valid_cluster)
{
break;
}
}
if(i == (num_clusters + ret_value))
{
return ret_value;
}
}
else
{
i = 1;
}
return (hme_try_merge_clusters_blksize_gt_16(&ps_cluster_data[i], num_clusters - 1)) +
ret_value;
}
/**
********************************************************************************
* @fn S32 hme_determine_validity_32x32
* (
* ctb_cluster_info_t *ps_ctb_cluster_info
* )
*
* @brief Determines whther current 32x32 block needs to be evaluated in enc_loop
* while recursing through the CU tree or not
*
* @param[in] ps_cluster_data: structure containing cluster data
*
* @return Success or failure
********************************************************************************
*/
__inline S32 hme_determine_validity_32x32(
ctb_cluster_info_t *ps_ctb_cluster_info,
S32 *pi4_children_nodes_required,
S32 blk_validity_wrt_pic_bndry,
S32 parent_blk_validity_wrt_pic_bndry)
{
cluster_data_t *ps_data;
cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
S32 num_clusters = ps_32x32_blk->num_clusters;
S32 num_clusters_parent = ps_64x64_blk->num_clusters;
if(!blk_validity_wrt_pic_bndry)
{
*pi4_children_nodes_required = 1;
return 0;
}
if(!parent_blk_validity_wrt_pic_bndry)
{
*pi4_children_nodes_required = 1;
return 1;
}
if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
{
*pi4_children_nodes_required = 1;
return 0;
}
if(num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
{
*pi4_children_nodes_required = 1;
return 1;
}
else if(num_clusters_parent < MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
{
*pi4_children_nodes_required = 0;
return 1;
}
else
{
if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
{
*pi4_children_nodes_required = 0;
return 1;
}
else
{
S32 i;
S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 4;
S32 min_area = MAX_32BIT_VAL;
S32 num_clusters_evaluated = 0;
for(i = 0; num_clusters_evaluated < num_clusters; i++)
{
ps_data = &ps_32x32_blk->as_cluster_data[i];
if(!ps_data->is_valid_cluster)
{
continue;
}
num_clusters_evaluated++;
if(ps_data->area_in_pixels < min_area)
{
min_area = ps_data->area_in_pixels;
}
}
if((min_area << 4) < area_of_parent)
{
*pi4_children_nodes_required = 1;
return 0;
}
else
{
*pi4_children_nodes_required = 0;
return 1;
}
}
}
}
/**
********************************************************************************
* @fn S32 hme_determine_validity_16x16
* (
* ctb_cluster_info_t *ps_ctb_cluster_info
* )
*
* @brief Determines whther current 16x16 block needs to be evaluated in enc_loop
* while recursing through the CU tree or not
*
* @param[in] ps_cluster_data: structure containing cluster data
*
* @return Success or failure
********************************************************************************
*/
__inline S32 hme_determine_validity_16x16(
ctb_cluster_info_t *ps_ctb_cluster_info,
S32 *pi4_children_nodes_required,
S32 blk_validity_wrt_pic_bndry,
S32 parent_blk_validity_wrt_pic_bndry)
{
cluster_data_t *ps_data;
cluster_16x16_blk_t *ps_16x16_blk = ps_ctb_cluster_info->ps_16x16_blk;
cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
S32 num_clusters = ps_16x16_blk->num_clusters;
S32 num_clusters_parent = ps_32x32_blk->num_clusters;
S32 num_clusters_grandparent = ps_64x64_blk->num_clusters;
if(!blk_validity_wrt_pic_bndry)
{
*pi4_children_nodes_required = 1;
return 0;
}
if(!parent_blk_validity_wrt_pic_bndry)
{
*pi4_children_nodes_required = 1;
return 1;
}
if((num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
(num_clusters_grandparent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
{
*pi4_children_nodes_required = 1;
return 1;
}
/* Implies nc_64 <= 3 when num_clusters_parent > 3 & */
/* implies nc_64 > 3 when num_clusters_parent < 3 & */
if(num_clusters_parent != MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
{
if(num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
{
*pi4_children_nodes_required = 0;
return 1;
}
else
{
*pi4_children_nodes_required = 1;
return 0;
}
}
/* Implies nc_64 >= 3 */
else
{
if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
{
*pi4_children_nodes_required = 0;
return 1;
}
else if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
{
*pi4_children_nodes_required = 1;
return 0;
}
else
{
S32 i;
S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 2;
S32 min_area = MAX_32BIT_VAL;
S32 num_clusters_evaluated = 0;
for(i = 0; num_clusters_evaluated < num_clusters; i++)
{
ps_data = &ps_16x16_blk->as_cluster_data[i];
if(!ps_data->is_valid_cluster)
{
continue;
}
num_clusters_evaluated++;
if(ps_data->area_in_pixels < min_area)
{
min_area = ps_data->area_in_pixels;
}
}
if((min_area << 4) < area_of_parent)
{
*pi4_children_nodes_required = 1;
return 0;
}
else
{
*pi4_children_nodes_required = 0;
return 1;
}
}
}
}
/**
********************************************************************************
* @fn void hme_build_cu_tree
* (
* ctb_cluster_info_t *ps_ctb_cluster_info,
* cur_ctb_cu_tree_t *ps_cu_tree,
* S32 tree_depth,
* CU_POS_T e_grand_parent_blk_pos,
* CU_POS_T e_parent_blk_pos,
* CU_POS_T e_cur_blk_pos
* )
*
* @brief Recursive function for CU tree initialisation
*
* @param[in] ps_ctb_cluster_info: structure containing pointers to clusters
* corresponding to all block sizes from 64x64
* to 16x16
*
* @param[in] e_parent_blk_pos: position of parent block wrt its parent, if
* applicable
*
* @param[in] e_cur_blk_pos: position of current block wrt parent
*
* @param[out] ps_cu_tree : represents CU tree used in CU recursion
*
* @param[in] tree_depth : specifies depth of the CU tree
*
* @return Nothing
********************************************************************************
*/
void hme_build_cu_tree(
ctb_cluster_info_t *ps_ctb_cluster_info,
cur_ctb_cu_tree_t *ps_cu_tree,
S32 tree_depth,
CU_POS_T e_grandparent_blk_pos,
CU_POS_T e_parent_blk_pos,
CU_POS_T e_cur_blk_pos)
{
ihevce_cu_tree_init(
ps_cu_tree,
ps_ctb_cluster_info->ps_cu_tree_root,
&ps_ctb_cluster_info->nodes_created_in_cu_tree,
tree_depth,
e_grandparent_blk_pos,
e_parent_blk_pos,
e_cur_blk_pos);
}
/**
********************************************************************************
* @fn S32 hme_sdi_based_cluster_spread_eligibility
* (
* cluster_32x32_blk_t *ps_blk_32x32
* )
*
* @brief Determines whether the spread of high SDI MV's around each cluster
* center is below a pre-determined threshold
*
* @param[in] ps_blk_32x32: structure containing pointers to clusters
* corresponding to all block sizes from 64x64
* to 16x16
*
* @return 1 if the spread is constrained, else 0
********************************************************************************
*/
__inline S32
hme_sdi_based_cluster_spread_eligibility(cluster_32x32_blk_t *ps_blk_32x32, S32 sdi_threshold)
{
S32 cumulative_mv_distance;
S32 i, j;
S32 num_high_sdi_mvs;
S32 num_clusters = ps_blk_32x32->num_clusters;
for(i = 0; i < num_clusters; i++)
{
cluster_data_t *ps_data = &ps_blk_32x32->as_cluster_data[i];
num_high_sdi_mvs = 0;
cumulative_mv_distance = 0;
for(j = 0; j < ps_data->num_mvs; j++)
{
mv_data_t *ps_mv = &ps_data->as_mv[j];
if(ps_mv->sdi >= sdi_threshold)
{
num_high_sdi_mvs++;
COMPUTE_MVD(ps_mv, ps_data, cumulative_mv_distance);
}
}
if(cumulative_mv_distance > ((ps_data->max_dist_from_centroid >> 1) * num_high_sdi_mvs))
{
return 0;
}
}
return 1;
}
/**
********************************************************************************
* @fn S32 hme_populate_cu_tree
* (
* ctb_cluster_info_t *ps_ctb_cluster_info,
* ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
* cur_ctb_cu_tree_t *ps_cu_tree,
* S32 tree_depth,
* CU_POS_T e_parent_blk_pos,
* CU_POS_T e_cur_blk_pos
* )
*
* @brief Recursive function for CU tree population based on output of
* clustering algorithm
*
* @param[in] ps_ctb_cluster_info: structure containing pointers to clusters
* corresponding to all block sizes from 64x64
* to 16x16
*
* @param[in] e_parent_blk_pos: position of parent block wrt its parent, if
applicable
*
* @param[in] e_cur_blk_pos: position of current block wrt parent
*
* @param[in] ps_cur_ipe_ctb : output container for ipe analyses
*
* @param[out] ps_cu_tree : represents CU tree used in CU recursion
*
* @param[in] tree_depth : specifies depth of the CU tree
*
* @param[in] ipe_decision_precedence : specifies whether precedence should
* be given to decisions made either by IPE(1) or clustering algos.
*
* @return 1 if re-evaluation of parent node's validity is not required,
else 0
********************************************************************************
*/
void hme_populate_cu_tree(
ctb_cluster_info_t *ps_ctb_cluster_info,
cur_ctb_cu_tree_t *ps_cu_tree,
S32 tree_depth,
ME_QUALITY_PRESETS_T e_quality_preset,
CU_POS_T e_grandparent_blk_pos,
CU_POS_T e_parent_blk_pos,
CU_POS_T e_cur_blk_pos)
{
S32 area_of_cur_blk;
S32 area_limit_for_me_decision_precedence;
S32 children_nodes_required;
S32 intra_mv_area;
S32 intra_eval_enable;
S32 inter_eval_enable;
S32 ipe_decision_precedence;
S32 node_validity;
S32 num_clusters;
ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb = ps_ctb_cluster_info->ps_cur_ipe_ctb;
if(NULL == ps_cu_tree)
{
return;
}
switch(tree_depth)
{
case 0:
{
/* 64x64 block */
S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
cluster_64x64_blk_t *ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 4;
area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
children_nodes_required = 0;
intra_mv_area = ps_blk_64x64->intra_mv_area;
ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
intra_eval_enable = ipe_decision_precedence;
inter_eval_enable = !!ps_blk_64x64->num_clusters;
#if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
if(e_quality_preset >= ME_HIGH_QUALITY)
{
inter_eval_enable = 1;
node_validity = (blk_32x32_mask == 0xf);
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
#endif
break;
}
#endif
#if ENABLE_4CTB_EVALUATION
node_validity = (blk_32x32_mask == 0xf);
break;
#else
{
S32 i;
num_clusters = ps_blk_64x64->num_clusters;
node_validity = (ipe_decision_precedence)
? (!ps_cur_ipe_ctb->u1_split_flag)
: (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK);
for(i = 0; i < MAX_NUM_REF; i++)
{
node_validity = node_validity && (ps_blk_64x64->au1_num_clusters[i] <=
MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
}
node_validity = node_validity && (blk_32x32_mask == 0xf);
}
break;
#endif
}
case 1:
{
/* 32x32 block */
S32 is_percent_intra_area_gt_threshold;
cluster_32x32_blk_t *ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cur_blk_pos];
S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
#if !ENABLE_4CTB_EVALUATION
S32 best_inter_cost = ps_blk_32x32->best_inter_cost;
S32 best_intra_cost =
((ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
ps_ctb_cluster_info->i4_frame_qstep * ps_ctb_cluster_info->i4_frame_qstep_multiplier *
4) < 0)
? MAX_32BIT_VAL
: (ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
ps_ctb_cluster_info->i4_frame_qstep *
ps_ctb_cluster_info->i4_frame_qstep_multiplier * 4);
S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
S32 cost_differential = (best_inter_cost - best_cost);
#endif
area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 2;
area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
intra_mv_area = ps_blk_32x32->intra_mv_area;
is_percent_intra_area_gt_threshold =
(intra_mv_area > area_limit_for_me_decision_precedence);
ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
intra_eval_enable = ipe_decision_precedence;
inter_eval_enable = !!ps_blk_32x32->num_clusters;
children_nodes_required = 1;
#if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
if(e_quality_preset >= ME_HIGH_QUALITY)
{
inter_eval_enable = 1;
node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
#endif
break;
}
#endif
#if ENABLE_4CTB_EVALUATION
node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
break;
#else
{
S32 i;
num_clusters = ps_blk_32x32->num_clusters;
if(ipe_decision_precedence)
{
node_validity = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_merge_flag);
node_validity = node_validity && (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
}
else
{
node_validity =
((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential)) &&
(num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
(((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
for(i = 0; (i < MAX_NUM_REF) && (node_validity); i++)
{
node_validity = node_validity && (ps_blk_32x32->au1_num_clusters[i] <=
MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
}
if(node_validity)
{
node_validity = node_validity &&
hme_sdi_based_cluster_spread_eligibility(
ps_blk_32x32, ps_ctb_cluster_info->sdi_threshold);
}
}
}
break;
#endif
}
case 2:
{
cluster_16x16_blk_t *ps_blk_16x16 =
&ps_ctb_cluster_info->ps_16x16_blk[e_cur_blk_pos + (e_parent_blk_pos << 2)];
S32 blk_8x8_mask =
ps_ctb_cluster_info->pi4_blk_8x8_mask[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N];
area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
children_nodes_required = 1;
intra_mv_area = ps_blk_16x16->intra_mv_area;
ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
num_clusters = ps_blk_16x16->num_clusters;
intra_eval_enable = ipe_decision_precedence;
inter_eval_enable = 1;
#if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
if(e_quality_preset >= ME_HIGH_QUALITY)
{
node_validity =
!ps_ctb_cluster_info
->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
children_nodes_required = !node_validity;
break;
}
#endif
#if ENABLE_4CTB_EVALUATION
node_validity = (blk_8x8_mask == 0xf);
#if ENABLE_CU_TREE_CULLING
{
cur_ctb_cu_tree_t *ps_32x32_root;
switch(e_parent_blk_pos)
{
case POS_TL:
{
ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
break;
}
case POS_TR:
{
ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
break;
}
case POS_BL:
{
ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
break;
}
case POS_BR:
{
ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
break;
}
}
if(ps_32x32_root->is_node_valid)
{
node_validity =
node_validity &&
!ps_ctb_cluster_info
->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
children_nodes_required = !node_validity;
}
}
#endif
break;
#else
if(ipe_decision_precedence)
{
S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
.as_intra16_analyse[e_cur_blk_pos]
.b1_merge_flag);
S32 valid_flag = (blk_8x8_mask == 0xf);
node_validity = merge_flag_16 && valid_flag;
}
else
{
node_validity = (blk_8x8_mask == 0xf);
}
break;
#endif
}
case 3:
{
S32 blk_8x8_mask =
ps_ctb_cluster_info
->pi4_blk_8x8_mask[(S32)(e_grandparent_blk_pos << 2) + e_parent_blk_pos];
S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos]
.as_intra16_analyse[e_parent_blk_pos]
.b1_merge_flag);
S32 merge_flag_32 =
(ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos].b1_merge_flag);
intra_eval_enable = !merge_flag_16 || !merge_flag_32;
inter_eval_enable = 1;
children_nodes_required = 0;
#if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
if(e_quality_preset >= ME_HIGH_QUALITY)
{
node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
break;
}
#endif
#if ENABLE_4CTB_EVALUATION
node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
break;
#else
{
cur_ctb_cu_tree_t *ps_32x32_root;
cur_ctb_cu_tree_t *ps_16x16_root;
cluster_32x32_blk_t *ps_32x32_blk;
switch(e_grandparent_blk_pos)
{
case POS_TL:
{
ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
break;
}
case POS_TR:
{
ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
break;
}
case POS_BL:
{
ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
break;
}
case POS_BR:
{
ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
break;
}
}
switch(e_parent_blk_pos)
{
case POS_TL:
{
ps_16x16_root = ps_32x32_root->ps_child_node_tl;
break;
}
case POS_TR:
{
ps_16x16_root = ps_32x32_root->ps_child_node_tr;
break;
}
case POS_BL:
{
ps_16x16_root = ps_32x32_root->ps_child_node_bl;
break;
}
case POS_BR:
{
ps_16x16_root = ps_32x32_root->ps_child_node_br;
break;
}
}
ps_32x32_blk = &ps_ctb_cluster_info->ps_32x32_blk[e_grandparent_blk_pos];
node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0) &&
((!ps_32x32_root->is_node_valid) ||
(ps_32x32_blk->num_clusters_with_weak_sdi_density > 0) ||
(!ps_16x16_root->is_node_valid));
break;
}
#endif
}
}
/* Fill the current cu_tree node */
ps_cu_tree->is_node_valid = node_validity;
ps_cu_tree->u1_intra_eval_enable = intra_eval_enable;
ps_cu_tree->u1_inter_eval_enable = inter_eval_enable;
if(children_nodes_required)
{
tree_depth++;
hme_populate_cu_tree(
ps_ctb_cluster_info,
ps_cu_tree->ps_child_node_tl,
tree_depth,
e_quality_preset,
e_parent_blk_pos,
e_cur_blk_pos,
POS_TL);
hme_populate_cu_tree(
ps_ctb_cluster_info,
ps_cu_tree->ps_child_node_tr,
tree_depth,
e_quality_preset,
e_parent_blk_pos,
e_cur_blk_pos,
POS_TR);
hme_populate_cu_tree(
ps_ctb_cluster_info,
ps_cu_tree->ps_child_node_bl,
tree_depth,
e_quality_preset,
e_parent_blk_pos,
e_cur_blk_pos,
POS_BL);
hme_populate_cu_tree(
ps_ctb_cluster_info,
ps_cu_tree->ps_child_node_br,
tree_depth,
e_quality_preset,
e_parent_blk_pos,
e_cur_blk_pos,
POS_BR);
}
}
/**
********************************************************************************
* @fn void hme_analyse_mv_clustering
* (
* search_results_t *ps_search_results,
* ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
* cur_ctb_cu_tree_t *ps_cu_tree
* )
*
* @brief Implementation for the clustering algorithm
*
* @param[in] ps_search_results: structure containing 16x16 block results
*
* @param[in] ps_cur_ipe_ctb : output container for ipe analyses
*
* @param[out] ps_cu_tree : represents CU tree used in CU recursion
*
* @return None
********************************************************************************
*/
void hme_analyse_mv_clustering(
search_results_t *ps_search_results,
inter_cu_results_t *ps_16x16_cu_results,
inter_cu_results_t *ps_8x8_cu_results,
ctb_cluster_info_t *ps_ctb_cluster_info,
S08 *pi1_future_list,
S08 *pi1_past_list,
S32 bidir_enabled,
ME_QUALITY_PRESETS_T e_quality_preset)
{
cluster_16x16_blk_t *ps_blk_16x16;
cluster_32x32_blk_t *ps_blk_32x32;
cluster_64x64_blk_t *ps_blk_64x64;
part_type_results_t *ps_best_result;
pu_result_t *aps_part_result[MAX_NUM_PARTS];
pu_result_t *aps_inferior_parts[MAX_NUM_PARTS];
PART_ID_T e_part_id;
PART_TYPE_T e_part_type;
S32 enable_64x64_merge;
S32 i, j, k;
S32 mvx, mvy;
S32 num_parts;
S32 ref_idx;
S32 ai4_pred_mode[MAX_NUM_PARTS];
S32 num_32x32_merges = 0;
/*****************************************/
/*****************************************/
/********* Enter ye who is HQ ************/
/*****************************************/
/*****************************************/
ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
/* Initialise data in each of the clusters */
for(i = 0; i < 16; i++)
{
ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
if(e_quality_preset < ME_HIGH_QUALITY)
{
hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
}
else
{
ps_blk_16x16->best_inter_cost = 0;
ps_blk_16x16->intra_mv_area = 0;
}
#else
hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
#endif
}
for(i = 0; i < 4; i++)
{
ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
if(e_quality_preset < ME_HIGH_QUALITY)
{
hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
}
else
{
ps_blk_32x32->best_inter_cost = 0;
ps_blk_32x32->intra_mv_area = 0;
}
#else
hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
#endif
}
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
if(e_quality_preset < ME_HIGH_QUALITY)
{
hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
}
else
{
ps_blk_64x64->best_inter_cost = 0;
ps_blk_64x64->intra_mv_area = 0;
}
#else
hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
#endif
/* Initialise data for all nodes in the CU tree */
hme_build_cu_tree(
ps_ctb_cluster_info, ps_ctb_cluster_info->ps_cu_tree_root, 0, POS_NA, POS_NA, POS_NA);
if(e_quality_preset >= ME_HIGH_QUALITY)
{
memset(ps_ctb_cluster_info->au1_is_16x16_blk_split, 1, 16 * sizeof(U08));
}
#if ENABLE_UNIFORM_CU_SIZE_16x16 || ENABLE_UNIFORM_CU_SIZE_8x8
return;
#endif
for(i = 0; i < 16; i++)
{
S32 blk_8x8_mask;
S32 is_16x16_blk_valid;
S32 num_clusters_updated;
S32 num_clusters;
blk_8x8_mask = ps_ctb_cluster_info->pi4_blk_8x8_mask[i];
ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
is_16x16_blk_valid = (blk_8x8_mask == 0xf);
if(is_16x16_blk_valid)
{
/* Use 8x8 data when 16x16 CU is split */
if(ps_search_results[i].u1_split_flag)
{
S32 blk_8x8_idx = i << 2;
num_parts = 4;
e_part_type = PRT_NxN;
for(j = 0; j < num_parts; j++, blk_8x8_idx++)
{
/* Only 2Nx2N partition supported for 8x8 block */
ASSERT(
ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].u1_part_type ==
((PART_TYPE_T)PRT_2Nx2N));
aps_part_result[j] =
&ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].as_pu_results[0];
aps_inferior_parts[j] =
&ps_8x8_cu_results[blk_8x8_idx].ps_best_results[1].as_pu_results[0];
ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
}
}
else
{
ps_best_result = &ps_16x16_cu_results[i].ps_best_results[0];
e_part_type = (PART_TYPE_T)ps_best_result->u1_part_type;
num_parts = gau1_num_parts_in_part_type[e_part_type];
for(j = 0; j < num_parts; j++)
{
aps_part_result[j] = &ps_best_result->as_pu_results[j];
aps_inferior_parts[j] = &ps_best_result[1].as_pu_results[j];
ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
}
ps_ctb_cluster_info->au1_is_16x16_blk_split[i] = 0;
}
for(j = 0; j < num_parts; j++)
{
pu_result_t *ps_part_result = aps_part_result[j];
S32 num_mvs = ((ai4_pred_mode[j] > 1) + 1);
e_part_id = ge_part_type_to_part_id[e_part_type][j];
/* Skip clustering if best mode is intra */
if((ps_part_result->pu.b1_intra_flag))
{
ps_blk_16x16->intra_mv_area += gai4_partition_area[e_part_id];
ps_blk_16x16->best_inter_cost += aps_inferior_parts[j]->i4_tot_cost;
continue;
}
else
{
ps_blk_16x16->best_inter_cost += ps_part_result->i4_tot_cost;
}
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
if(e_quality_preset >= ME_HIGH_QUALITY)
{
continue;
}
#endif
for(k = 0; k < num_mvs; k++)
{
mv_t *ps_mv;
pu_mv_t *ps_pu_mv = &ps_part_result->pu.mv;
S32 is_l0_mv = ((ai4_pred_mode[j] == 2) && !k) || (ai4_pred_mode[j] == 0);
ps_mv = (is_l0_mv) ? (&ps_pu_mv->s_l0_mv) : (&ps_pu_mv->s_l1_mv);
mvx = ps_mv->i2_mvx;
mvy = ps_mv->i2_mvy;
ref_idx = (is_l0_mv) ? pi1_past_list[ps_pu_mv->i1_l0_ref_idx]
: pi1_future_list[ps_pu_mv->i1_l1_ref_idx];
num_clusters = ps_blk_16x16->num_clusters;
hme_find_and_update_clusters(
ps_blk_16x16->as_cluster_data,
&(ps_blk_16x16->num_clusters),
mvx,
mvy,
ref_idx,
ps_part_result->i4_sdi,
e_part_id,
(ai4_pred_mode[j] == 2));
num_clusters_updated = (ps_blk_16x16->num_clusters);
ps_blk_16x16->au1_num_clusters[ref_idx] +=
(num_clusters_updated - num_clusters);
}
}
}
}
/* Search for 32x32 clusters */
for(i = 0; i < 4; i++)
{
S32 num_clusters_merged;
S32 is_32x32_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << i)) || 0;
if(is_32x32_blk_valid)
{
ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i << 2];
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
if(e_quality_preset >= ME_HIGH_QUALITY)
{
for(j = 0; j < 4; j++, ps_blk_16x16++)
{
ps_blk_32x32->intra_mv_area += ps_blk_16x16->intra_mv_area;
ps_blk_32x32->best_inter_cost += ps_blk_16x16->best_inter_cost;
}
continue;
}
#endif
hme_update_32x32_clusters(ps_blk_32x32, ps_blk_16x16);
if((ps_blk_32x32->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
{
num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
ps_blk_32x32->as_cluster_data, (ps_blk_32x32->num_clusters));
if(num_clusters_merged)
{
ps_blk_32x32->num_clusters -= num_clusters_merged;
UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_32x32);
}
}
}
}
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
/* Eliminate outlier 32x32 clusters */
if(e_quality_preset < ME_HIGH_QUALITY)
#endif
{
hme_boot_out_outlier(ps_ctb_cluster_info, 32);
/* Find best_uni_ref and best_alt_ref */
hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 32);
}
/* Populate the CU tree for depths 1 and higher */
{
cur_ctb_cu_tree_t *ps_tree_root = ps_ctb_cluster_info->ps_cu_tree_root;
cur_ctb_cu_tree_t *ps_tl = ps_tree_root->ps_child_node_tl;
cur_ctb_cu_tree_t *ps_tr = ps_tree_root->ps_child_node_tr;
cur_ctb_cu_tree_t *ps_bl = ps_tree_root->ps_child_node_bl;
cur_ctb_cu_tree_t *ps_br = ps_tree_root->ps_child_node_br;
hme_populate_cu_tree(
ps_ctb_cluster_info, ps_tl, 1, e_quality_preset, POS_NA, POS_NA, POS_TL);
num_32x32_merges += (ps_tl->is_node_valid == 1);
hme_populate_cu_tree(
ps_ctb_cluster_info, ps_tr, 1, e_quality_preset, POS_NA, POS_NA, POS_TR);
num_32x32_merges += (ps_tr->is_node_valid == 1);
hme_populate_cu_tree(
ps_ctb_cluster_info, ps_bl, 1, e_quality_preset, POS_NA, POS_NA, POS_BL);
num_32x32_merges += (ps_bl->is_node_valid == 1);
hme_populate_cu_tree(
ps_ctb_cluster_info, ps_br, 1, e_quality_preset, POS_NA, POS_NA, POS_BR);
num_32x32_merges += (ps_br->is_node_valid == 1);
}
#if !ENABLE_4CTB_EVALUATION
if(e_quality_preset < ME_HIGH_QUALITY)
{
enable_64x64_merge = (num_32x32_merges >= 3);
}
#else
if(e_quality_preset < ME_HIGH_QUALITY)
{
enable_64x64_merge = 1;
}
#endif
#if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
if(e_quality_preset >= ME_HIGH_QUALITY)
{
enable_64x64_merge = 1;
}
#else
if(e_quality_preset >= ME_HIGH_QUALITY)
{
enable_64x64_merge = (num_32x32_merges >= 3);
}
#endif
if(enable_64x64_merge)
{
S32 num_clusters_merged;
ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[0];
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
if(e_quality_preset >= ME_HIGH_QUALITY)
{
for(j = 0; j < 4; j++, ps_blk_32x32++)
{
ps_blk_64x64->intra_mv_area += ps_blk_32x32->intra_mv_area;
ps_blk_64x64->best_inter_cost += ps_blk_32x32->best_inter_cost;
}
}
else
#endif
{
hme_update_64x64_clusters(ps_blk_64x64, ps_blk_32x32);
if((ps_blk_64x64->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
{
num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
ps_blk_64x64->as_cluster_data, (ps_blk_64x64->num_clusters));
if(num_clusters_merged)
{
ps_blk_64x64->num_clusters -= num_clusters_merged;
UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_64x64);
}
}
}
#if !ENABLE_4CTB_EVALUATION
if(e_quality_preset < ME_HIGH_QUALITY)
{
S32 best_inter_cost = ps_blk_64x64->best_inter_cost;
S32 best_intra_cost =
((ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
ps_ctb_cluster_info->i4_frame_qstep *
ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16) < 0)
? MAX_32BIT_VAL
: (ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
ps_ctb_cluster_info->i4_frame_qstep *
ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16);
S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
S32 cost_differential = (best_inter_cost - best_cost);
enable_64x64_merge =
((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential));
}
#endif
}
if(enable_64x64_merge)
{
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
if(e_quality_preset < ME_HIGH_QUALITY)
#endif
{
hme_boot_out_outlier(ps_ctb_cluster_info, 64);
hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 64);
}
hme_populate_cu_tree(
ps_ctb_cluster_info,
ps_ctb_cluster_info->ps_cu_tree_root,
0,
e_quality_preset,
POS_NA,
POS_NA,
POS_NA);
}
}
#endif
static __inline void hme_merge_prms_init(
hme_merge_prms_t *ps_prms,
layer_ctxt_t *ps_curr_layer,
refine_prms_t *ps_refine_prms,
me_frm_ctxt_t *ps_me_ctxt,
range_prms_t *ps_range_prms_rec,
range_prms_t *ps_range_prms_inp,
mv_grid_t **pps_mv_grid,
inter_ctb_prms_t *ps_inter_ctb_prms,
S32 i4_num_pred_dir,
S32 i4_32x32_id,
BLK_SIZE_T e_blk_size,
ME_QUALITY_PRESETS_T e_me_quality_presets)
{
S32 i4_use_rec = ps_refine_prms->i4_use_rec_in_fpel;
S32 i4_cu_16x16 = (BLK_32x32 == e_blk_size) ? (i4_32x32_id << 2) : 0;
/* Currently not enabling segmentation info from prev layers */
ps_prms->i4_seg_info_avail = 0;
ps_prms->i4_part_mask = 0;
/* Number of reference pics in which to do merge */
ps_prms->i4_num_ref = i4_num_pred_dir;
/* Layer ctxt info */
ps_prms->ps_layer_ctxt = ps_curr_layer;
ps_prms->ps_inter_ctb_prms = ps_inter_ctb_prms;
/* Top left, top right, bottom left and bottom right 16x16 units */
if(BLK_32x32 == e_blk_size)
{
ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16];
ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 1];
ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 2];
ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 3];
/* Merge results stored here */
ps_prms->ps_results_merge = &ps_me_ctxt->as_search_results_32x32[i4_32x32_id];
/* This could be lesser than the number of 16x16results generated*/
/* For now, keeping it to be same */
ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_fpel_results;
ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[i4_32x32_id << 4];
ps_prms->ps_results_grandchild = NULL;
}
else
{
ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_32x32[0];
ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_32x32[1];
ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_32x32[2];
ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_32x32[3];
/* Merge results stored here */
ps_prms->ps_results_merge = &ps_me_ctxt->s_search_results_64x64;
ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_32x32_merge_results;
ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[0];
ps_prms->ps_results_grandchild = ps_me_ctxt->as_search_results_16x16;
}
if(i4_use_rec)
{
WORD32 ref_ctr;
for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
{
ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_rec[ref_ctr];
}
}
else
{
WORD32 ref_ctr;
for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
{
ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_inp[ref_ctr];
}
}
ps_prms->i4_use_rec = i4_use_rec;
ps_prms->pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
ps_prms->pps_mv_grid = pps_mv_grid;
ps_prms->log_ctb_size = ps_me_ctxt->log_ctb_size;
ps_prms->e_quality_preset = e_me_quality_presets;
ps_prms->pi1_future_list = ps_me_ctxt->ai1_future_list;
ps_prms->pi1_past_list = ps_me_ctxt->ai1_past_list;
ps_prms->ps_cluster_info = ps_me_ctxt->ps_ctb_cluster_info;
}
/**
********************************************************************************
* @fn void hme_refine(me_ctxt_t *ps_ctxt,
* refine_layer_prms_t *ps_refine_prms)
*
* @brief Top level entry point for refinement ME
*
* @param[in,out] ps_ctxt: ME Handle
*
* @param[in] ps_refine_prms : refinement layer prms
*
* @return None
********************************************************************************
*/
void hme_refine(
me_ctxt_t *ps_thrd_ctxt,
refine_prms_t *ps_refine_prms,
PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,
layer_ctxt_t *ps_coarse_layer,
multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
S32 lyr_job_type,
S32 thrd_id,
S32 me_frm_id,
pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input)
{
inter_ctb_prms_t s_common_frm_prms;
BLK_SIZE_T e_search_blk_size, e_result_blk_size;
WORD32 i4_me_frm_id = me_frm_id % MAX_NUM_ME_PARALLEL;
me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
ME_QUALITY_PRESETS_T e_me_quality_presets =
ps_thrd_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
WORD32 num_rows_proc = 0;
WORD32 num_act_ref_pics;
WORD16 i2_prev_enc_frm_max_mv_y;
WORD32 i4_idx_dvsr_p = ps_multi_thrd_ctxt->i4_idx_dvsr_p;
/*************************************************************************/
/* Complexity of search: Low to High */
/*************************************************************************/
SEARCH_COMPLEXITY_T e_search_complexity;
/*************************************************************************/
/* to store the PU results which are passed to the decide_part_types */
/* as input prms. Multiplied by 4 as the max number of Ref in a List is 4*/
/*************************************************************************/
pu_result_t as_pu_results[2][TOT_NUM_PARTS][MAX_NUM_RESULTS_PER_PART_LIST];
inter_pu_results_t as_inter_pu_results[4];
inter_pu_results_t *ps_pu_results = as_inter_pu_results;
/*************************************************************************/
/* Config parameter structures for varius ME submodules */
/*************************************************************************/
hme_merge_prms_t s_merge_prms_32x32_tl, s_merge_prms_32x32_tr;
hme_merge_prms_t s_merge_prms_32x32_bl, s_merge_prms_32x32_br;
hme_merge_prms_t s_merge_prms_64x64;
hme_search_prms_t s_search_prms_blk;
mvbank_update_prms_t s_mv_update_prms;
hme_ctb_prms_t s_ctb_prms;
hme_subpel_prms_t s_subpel_prms;
fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_ctxt->ps_fullpel_refine_ctxt;
ctb_cluster_info_t *ps_ctb_cluster_info;
fpel_srch_cand_init_data_t s_srch_cand_init_data;
/* 4 bits (LSBs) of this variable control merge of 4 32x32 CUs in CTB */
S32 en_merge_32x32;
/* 5 lsb's specify whether or not merge algorithm is required */
/* to be executed or not. Relevant only in PQ. Ought to be */
/* used in conjunction with en_merge_32x32 and */
/* ps_ctb_bound_attrs->u1_merge_to_64x64_flag. This is */
/* required when all children are deemed to be intras */
S32 en_merge_execution;
/*************************************************************************/
/* All types of search candidates for predictor based search. */
/*************************************************************************/
S32 num_init_candts = 0;
S32 i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
S32 i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
search_node_t as_top_neighbours[4], as_left_neighbours[3];
pf_get_wt_inp fp_get_wt_inp;
search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
U32 au4_unique_node_map[MAP_X_MAX * 2];
/* Controls the boundary attributes of CTB, whether it has 64x64 or not */
ctb_boundary_attrs_t *ps_ctb_bound_attrs;
/*************************************************************************/
/* points ot the search results for the blk level search (8x8/16x16) */
/*************************************************************************/
search_results_t *ps_search_results;
/*************************************************************************/
/* Coordinates */
/*************************************************************************/
S32 blk_x, blk_y, i4_ctb_x, i4_ctb_y, tile_col_idx, blk_id_in_ctb;
S32 pos_x, pos_y;
S32 blk_id_in_full_ctb;
/*************************************************************************/
/* Related to dimensions of block being searched and pic dimensions */
/*************************************************************************/
S32 blk_4x4_to_16x16;
S32 blk_wd, blk_ht, blk_size_shift;
S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
S32 num_results_prev_layer;
/*************************************************************************/
/* Size of a basic unit for this layer. For non encode layers, we search */
/* in block sizes of 8x8. For encode layers, though we search 16x16s the */
/* basic unit size is the ctb size. */
/*************************************************************************/
S32 unit_size;
/*************************************************************************/
/* Local variable storing results of any 4 CU merge to bigger CU */
/*************************************************************************/
CU_MERGE_RESULT_T e_merge_result;
/*************************************************************************/
/* This mv grid stores results during and after fpel search, during */
/* merge, subpel and bidirect refinements stages. 2 instances of this are*/
/* meant for the 2 directions of search (l0 and l1). */
/*************************************************************************/
mv_grid_t *aps_mv_grid[2];
/*************************************************************************/
/* Pointers to context in current and coarser layers */
/*************************************************************************/
layer_ctxt_t *ps_curr_layer, *ps_prev_layer;
/*************************************************************************/
/* to store mv range per blk, and picture limit, allowed search range */
/* range prms in hpel and qpel units as well */
/*************************************************************************/
range_prms_t as_range_prms_inp[MAX_NUM_REF], as_range_prms_rec[MAX_NUM_REF];
range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
range_prms_t as_range_prms_hpel[MAX_NUM_REF], as_range_prms_qpel[MAX_NUM_REF];
/*************************************************************************/
/* These variables are used to track number of references at different */
/* stages of ME. */
/*************************************************************************/
S32 i4_num_pred_dir;
S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
S32 lambda_recon = ps_refine_prms->lambda_recon;
/* Counts successful merge to 32x32 every CTB (0-4) */
S32 merge_count_32x32;
S32 ai4_id_coloc[14], ai4_id_Z[2];
U08 au1_search_candidate_list_index[2];
S32 ai4_num_coloc_cands[2];
U08 u1_pred_dir, u1_pred_dir_ctr;
/*************************************************************************/
/* Input pointer and stride */
/*************************************************************************/
U08 *pu1_inp;
S32 i4_inp_stride;
S32 end_of_frame;
S32 num_sync_units_in_row, num_sync_units_in_tile;
/*************************************************************************/
/* Indicates whether the all 4 8x8 blks are valid in the 16x16 blk in the*/
/* encode layer. If not 15, then 1 or more 8x8 blks not valid. Means that*/
/* we need to stop merges and force 8x8 CUs for that 16x16 blk */
/*************************************************************************/
S32 blk_8x8_mask;
S32 ai4_blk_8x8_mask[16];
U08 au1_is_64x64Blk_noisy[1];
U08 au1_is_32x32Blk_noisy[4];
U08 au1_is_16x16Blk_noisy[16];
ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
/*************************************************************************/
/* Pointers to current and coarse layer are needed for projection */
/* Pointer to prev layer are needed for other candts like coloc */
/*************************************************************************/
ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
ps_prev_layer = hme_get_past_layer_ctxt(
ps_thrd_ctxt, ps_ctxt, ps_refine_prms->i4_layer_id, ps_multi_thrd_ctxt->i4_num_me_frm_pllel);
num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
/* Function pointer is selected based on the C vc X86 macro */
fp_get_wt_inp = ps_me_optimised_function_list->pf_get_wt_inp_ctb;
i4_inp_stride = ps_curr_layer->i4_inp_stride;
i4_pic_wd = ps_curr_layer->i4_wd;
i4_pic_ht = ps_curr_layer->i4_ht;
e_search_complexity = ps_refine_prms->e_search_complexity;
end_of_frame = 0;
/* This points to all the initial candts */
ps_search_candts = &as_search_candts[0];
/* mv grid being huge strucutre is part of context */
aps_mv_grid[0] = &ps_ctxt->as_mv_grid[0];
aps_mv_grid[1] = &ps_ctxt->as_mv_grid[1];
/*************************************************************************/
/* If the current layer is encoded (since it may be multicast or final */
/* layer (finest)), then we use 16x16 blk size with some selected parts */
/* If the current layer is not encoded, then we use 8x8 blk size, with */
/* enable or disable of 4x4 partitions depending on the input prms */
/*************************************************************************/
e_search_blk_size = BLK_16x16;
blk_wd = blk_ht = 16;
blk_size_shift = 4;
e_result_blk_size = BLK_8x8;
s_mv_update_prms.i4_shift = 1;
if(ps_coarse_layer->ps_layer_mvbank->e_blk_size == BLK_4x4)
{
blk_4x4_to_16x16 = 1;
}
else
{
blk_4x4_to_16x16 = 0;
}
unit_size = 1 << ps_ctxt->log_ctb_size;
s_search_prms_blk.i4_inp_stride = unit_size;
/* This is required to properly update the layer mv bank */
s_mv_update_prms.e_search_blk_size = e_search_blk_size;
s_search_prms_blk.e_blk_size = e_search_blk_size;
/*************************************************************************/
/* If current layer is explicit, then the number of ref frames are to */
/* be same as previous layer. Else it will be 2 */
/*************************************************************************/
i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
i4_num_pred_dir =
(ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 0) && (i4_num_act_ref_l1 > 0)) +
1;
#if USE_MODIFIED == 1
s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
#else
s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
#endif
i4_num_pred_dir = MIN(i4_num_pred_dir, i4_num_ref_prev_layer);
if(i4_num_ref_prev_layer <= 2)
{
i4_num_ref_each_dir = 1;
}
else
{
i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
}
s_mv_update_prms.i4_num_ref = i4_num_pred_dir;
s_mv_update_prms.i4_num_results_to_store =
MIN((ps_ctxt->s_frm_prms.bidir_enabled) ? ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref
: (i4_num_act_ref_l0 > 1) + 1,
ps_refine_prms->i4_num_results_per_part);
/*************************************************************************/
/* Initialization of merge params for 16x16 to 32x32 merge. */
/* There are 4 32x32 units in a CTB, so 4 param structures initialized */
/*************************************************************************/
{
hme_merge_prms_t *aps_merge_prms[4];
aps_merge_prms[0] = &s_merge_prms_32x32_tl;
aps_merge_prms[1] = &s_merge_prms_32x32_tr;
aps_merge_prms[2] = &s_merge_prms_32x32_bl;
aps_merge_prms[3] = &s_merge_prms_32x32_br;
for(i = 0; i < 4; i++)
{
hme_merge_prms_init(
aps_merge_prms[i],
ps_curr_layer,
ps_refine_prms,
ps_ctxt,
as_range_prms_rec,
as_range_prms_inp,
&aps_mv_grid[0],
&s_common_frm_prms,
i4_num_pred_dir,
i,
BLK_32x32,
e_me_quality_presets);
}
}
/*************************************************************************/
/* Initialization of merge params for 32x32 to 64x64 merge. */
/* There are 4 32x32 units in a CTB, so only 1 64x64 CU can be in CTB */
/*************************************************************************/
{
hme_merge_prms_init(
&s_merge_prms_64x64,
ps_curr_layer,
ps_refine_prms,
ps_ctxt,
as_range_prms_rec,
as_range_prms_inp,
&aps_mv_grid[0],
&s_common_frm_prms,
i4_num_pred_dir,
0,
BLK_64x64,
e_me_quality_presets);
}
/* Pointers to cu_results are initialised here */
{
WORD32 i;
ps_ctxt->s_search_results_64x64.ps_cu_results = &ps_ctxt->s_cu64x64_results;
for(i = 0; i < 4; i++)
{
ps_ctxt->as_search_results_32x32[i].ps_cu_results = &ps_ctxt->as_cu32x32_results[i];
}
for(i = 0; i < 16; i++)
{
ps_ctxt->as_search_results_16x16[i].ps_cu_results = &ps_ctxt->as_cu16x16_results[i];
}
}
/*************************************************************************/
/* SUBPEL Params initialized here */
/*************************************************************************/
{
s_subpel_prms.ps_search_results_16x16 = &ps_ctxt->as_search_results_16x16[0];
s_subpel_prms.ps_search_results_32x32 = &ps_ctxt->as_search_results_32x32[0];
s_subpel_prms.ps_search_results_64x64 = &ps_ctxt->s_search_results_64x64;
s_subpel_prms.i4_num_16x16_candts = ps_refine_prms->i4_num_fpel_results;
s_subpel_prms.i4_num_32x32_candts = ps_refine_prms->i4_num_32x32_merge_results;
s_subpel_prms.i4_num_64x64_candts = ps_refine_prms->i4_num_64x64_merge_results;
s_subpel_prms.i4_num_steps_hpel_refine = ps_refine_prms->i4_num_steps_hpel_refine;
s_subpel_prms.i4_num_steps_qpel_refine = ps_refine_prms->i4_num_steps_qpel_refine;
s_subpel_prms.i4_use_satd = ps_refine_prms->i4_use_satd_subpel;
s_subpel_prms.i4_inp_stride = unit_size;
s_subpel_prms.u1_max_subpel_candts_2Nx2N = ps_refine_prms->u1_max_subpel_candts_2Nx2N;
s_subpel_prms.u1_max_subpel_candts_NxN = ps_refine_prms->u1_max_subpel_candts_NxN;
s_subpel_prms.u1_subpel_candt_threshold = ps_refine_prms->u1_subpel_candt_threshold;
s_subpel_prms.pf_qpel_interp = ps_me_optimised_function_list->pf_qpel_interp_avg_generic;
{
WORD32 ref_ctr;
for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
{
s_subpel_prms.aps_mv_range_hpel[ref_ctr] = &as_range_prms_hpel[ref_ctr];
s_subpel_prms.aps_mv_range_qpel[ref_ctr] = &as_range_prms_qpel[ref_ctr];
}
}
s_subpel_prms.pi2_inp_bck = ps_ctxt->pi2_inp_bck;
#if USE_MODIFIED == 0
s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
#else
s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
#endif
s_subpel_prms.e_me_quality_presets = e_me_quality_presets;
/* BI Refinement done only if this field is 1 */
s_subpel_prms.bidir_enabled = ps_refine_prms->bidir_enabled;
s_subpel_prms.u1_num_ref = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
s_subpel_prms.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
s_subpel_prms.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
s_subpel_prms.u1_max_num_subpel_refine_centers =
ps_refine_prms->u1_max_num_subpel_refine_centers;
}
/* inter_ctb_prms_t struct initialisation */
{
inter_ctb_prms_t *ps_inter_ctb_prms = &s_common_frm_prms;
hme_subpel_prms_t *ps_subpel_prms = &s_subpel_prms;
ps_inter_ctb_prms->pps_rec_list_l0 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l0;
ps_inter_ctb_prms->pps_rec_list_l1 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l1;
ps_inter_ctb_prms->wpred_log_wdc = ps_ctxt->s_wt_pred.wpred_log_wdc;
ps_inter_ctb_prms->u1_max_tr_depth = ps_thrd_ctxt->s_init_prms.u1_max_tr_depth;
ps_inter_ctb_prms->i1_quality_preset = e_me_quality_presets;
ps_inter_ctb_prms->i4_bidir_enabled = ps_subpel_prms->bidir_enabled;
ps_inter_ctb_prms->i4_inp_stride = ps_subpel_prms->i4_inp_stride;
ps_inter_ctb_prms->u1_num_ref = ps_subpel_prms->u1_num_ref;
ps_inter_ctb_prms->u1_use_satd = ps_subpel_prms->i4_use_satd;
ps_inter_ctb_prms->i4_rec_stride = ps_curr_layer->i4_rec_stride;
ps_inter_ctb_prms->u1_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
ps_inter_ctb_prms->u1_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
ps_inter_ctb_prms->i4_lamda = lambda_recon;
ps_inter_ctb_prms->u1_lamda_qshift = ps_refine_prms->lambda_q_shift;
ps_inter_ctb_prms->i4_qstep_ls8 = ps_ctxt->ps_hme_frm_prms->qstep_ls8;
ps_inter_ctb_prms->pi4_inv_wt = ps_ctxt->s_wt_pred.a_inv_wpred_wt;
ps_inter_ctb_prms->pi1_past_list = ps_ctxt->ai1_past_list;
ps_inter_ctb_prms->pi1_future_list = ps_ctxt->ai1_future_list;
ps_inter_ctb_prms->pu4_src_variance = s_search_prms_blk.au4_src_variance;
ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands =
ps_refine_prms->u1_max_2nx2n_tu_recur_cands;
}
for(i = 0; i < MAX_INIT_CANDTS; i++)
{
ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
ps_search_candts[i].ps_search_node->ps_mv = &ps_ctxt->as_search_cand_mv[i];
INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
}
num_act_ref_pics =
ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
if(num_act_ref_pics)
{
hme_search_cand_data_init(
ai4_id_Z,
ai4_id_coloc,
ai4_num_coloc_cands,
au1_search_candidate_list_index,
i4_num_act_ref_l0,
i4_num_act_ref_l1,
ps_ctxt->s_frm_prms.bidir_enabled,
blk_4x4_to_16x16);
}
if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 1))
{
ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
ps_search_candts[ai4_id_Z[1]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[1];
}
else if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 == 1))
{
ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
}
for(i = 0; i < 3; i++)
{
search_node_t *ps_search_node;
ps_search_node = &as_left_neighbours[i];
INIT_SEARCH_NODE(ps_search_node, 0);
ps_search_node = &as_top_neighbours[i];
INIT_SEARCH_NODE(ps_search_node, 0);
}
INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
as_left_neighbours[2].u1_is_avail = 0;
/*************************************************************************/
/* Initialize all the search results structure here. We update all the */
/* search results to default values, and configure things like blk sizes */
/*************************************************************************/
if(num_act_ref_pics)
{
S32 i4_x, i4_y;
/* 16x16 results */
for(i = 0; i < 16; i++)
{
search_results_t *ps_search_results;
S32 pred_lx;
ps_search_results = &ps_ctxt->as_search_results_16x16[i];
i4_x = (S32)gau1_encode_to_raster_x[i];
i4_y = (S32)gau1_encode_to_raster_y[i];
i4_x <<= 4;
i4_y <<= 4;
hme_init_search_results(
ps_search_results,
i4_num_pred_dir,
ps_refine_prms->i4_num_fpel_results,
ps_refine_prms->i4_num_results_per_part,
e_search_blk_size,
i4_x,
i4_y,
&ps_ctxt->au1_is_past[0]);
for(pred_lx = 0; pred_lx < 2; pred_lx++)
{
pred_ctxt_t *ps_pred_ctxt;
ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
hme_init_pred_ctxt_encode(
ps_pred_ctxt,
ps_search_results,
ps_search_candts[ai4_id_coloc[0]].ps_search_node,
ps_search_candts[ai4_id_Z[0]].ps_search_node,
aps_mv_grid[pred_lx],
pred_lx,
lambda_recon,
ps_refine_prms->lambda_q_shift,
&ps_ctxt->apu1_ref_bits_tlu_lc[0],
&ps_ctxt->ai2_ref_scf[0]);
}
}
for(i = 0; i < 4; i++)
{
search_results_t *ps_search_results;
S32 pred_lx;
ps_search_results = &ps_ctxt->as_search_results_32x32[i];
i4_x = (S32)gau1_encode_to_raster_x[i];
i4_y = (S32)gau1_encode_to_raster_y[i];
i4_x <<= 5;
i4_y <<= 5;
hme_init_search_results(
ps_search_results,
i4_num_pred_dir,
ps_refine_prms->i4_num_32x32_merge_results,
ps_refine_prms->i4_num_results_per_part,
BLK_32x32,
i4_x,
i4_y,
&ps_ctxt->au1_is_past[0]);
for(pred_lx = 0; pred_lx < 2; pred_lx++)
{
pred_ctxt_t *ps_pred_ctxt;
ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
hme_init_pred_ctxt_encode(
ps_pred_ctxt,
ps_search_results,
ps_search_candts[ai4_id_coloc[0]].ps_search_node,
ps_search_candts[ai4_id_Z[0]].ps_search_node,
aps_mv_grid[pred_lx],
pred_lx,
lambda_recon,
ps_refine_prms->lambda_q_shift,
&ps_ctxt->apu1_ref_bits_tlu_lc[0],
&ps_ctxt->ai2_ref_scf[0]);
}
}
{
search_results_t *ps_search_results;
S32 pred_lx;
ps_search_results = &ps_ctxt->s_search_results_64x64;
hme_init_search_results(
ps_search_results,
i4_num_pred_dir,
ps_refine_prms->i4_num_64x64_merge_results,
ps_refine_prms->i4_num_results_per_part,
BLK_64x64,
0,
0,
&ps_ctxt->au1_is_past[0]);
for(pred_lx = 0; pred_lx < 2; pred_lx++)
{
pred_ctxt_t *ps_pred_ctxt;
ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
hme_init_pred_ctxt_encode(
ps_pred_ctxt,
ps_search_results,
ps_search_candts[ai4_id_coloc[0]].ps_search_node,
ps_search_candts[ai4_id_Z[0]].ps_search_node,
aps_mv_grid[pred_lx],
pred_lx,
lambda_recon,
ps_refine_prms->lambda_q_shift,
&ps_ctxt->apu1_ref_bits_tlu_lc[0],
&ps_ctxt->ai2_ref_scf[0]);
}
}
}
/* Initialise the structure used in clustering */
if(ME_PRISTINE_QUALITY == e_me_quality_presets)
{
ps_ctb_cluster_info = ps_ctxt->ps_ctb_cluster_info;
ps_ctb_cluster_info->ps_16x16_blk = ps_ctxt->ps_blk_16x16;
ps_ctb_cluster_info->ps_32x32_blk = ps_ctxt->ps_blk_32x32;
ps_ctb_cluster_info->ps_64x64_blk = ps_ctxt->ps_blk_64x64;
ps_ctb_cluster_info->pi4_blk_8x8_mask = ai4_blk_8x8_mask;
ps_ctb_cluster_info->sdi_threshold = ps_refine_prms->sdi_threshold;
ps_ctb_cluster_info->i4_frame_qstep = ps_ctxt->frm_qstep;
ps_ctb_cluster_info->i4_frame_qstep_multiplier = 16;
}
/*********************************************************************/
/* Initialize the dyn. search range params. for each reference index */
/* in current layer ctxt */
/*********************************************************************/
/* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
{
WORD32 ref_ctr;
/* set no. of act ref in L0 for further use at frame level */
ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_num_act_ref_in_l0 =
ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
for(ref_ctr = 0; ref_ctr < ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ref_ctr++)
{
INIT_DYN_SEARCH_PRMS(
&ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[ref_ctr],
ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
}
}
/*************************************************************************/
/* Now that the candidates have been ordered, to choose the right number */
/* of initial candidates. */
/*************************************************************************/
if(blk_4x4_to_16x16)
{
if(i4_num_ref_prev_layer > 2)
{
if(e_search_complexity == SEARCH_CX_LOW)
num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
else if(e_search_complexity == SEARCH_CX_MED)
num_init_candts = 14 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
else if(e_search_complexity == SEARCH_CX_HIGH)
num_init_candts = 21 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
else
ASSERT(0);
}
else if(i4_num_ref_prev_layer == 2)
{
if(e_search_complexity == SEARCH_CX_LOW)
num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
else if(e_search_complexity == SEARCH_CX_MED)
num_init_candts = 12 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
else if(e_search_complexity == SEARCH_CX_HIGH)
num_init_candts = 19 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
else
ASSERT(0);
}
else
{
if(e_search_complexity == SEARCH_CX_LOW)
num_init_candts = 5;
else if(e_search_complexity == SEARCH_CX_MED)
num_init_candts = 12;
else if(e_search_complexity == SEARCH_CX_HIGH)
num_init_candts = 19;
else
ASSERT(0);
}
}
else
{
if(i4_num_ref_prev_layer > 2)
{
if(e_search_complexity == SEARCH_CX_LOW)
num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
else if(e_search_complexity == SEARCH_CX_MED)
num_init_candts = 13 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
else if(e_search_complexity == SEARCH_CX_HIGH)
num_init_candts = 18 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
else
ASSERT(0);
}
else if(i4_num_ref_prev_layer == 2)
{
if(e_search_complexity == SEARCH_CX_LOW)
num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
else if(e_search_complexity == SEARCH_CX_MED)
num_init_candts = 11 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
else if(e_search_complexity == SEARCH_CX_HIGH)
num_init_candts = 16 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
else
ASSERT(0);
}
else
{
if(e_search_complexity == SEARCH_CX_LOW)
num_init_candts = 5;
else if(e_search_complexity == SEARCH_CX_MED)
num_init_candts = 11;
else if(e_search_complexity == SEARCH_CX_HIGH)
num_init_candts = 16;
else
ASSERT(0);
}
}
/*************************************************************************/
/* The following search parameters are fixed throughout the search across*/
/* all blks. So these are configured outside processing loop */
/*************************************************************************/
s_search_prms_blk.i4_num_init_candts = num_init_candts;
s_search_prms_blk.i4_start_step = 1;
s_search_prms_blk.i4_use_satd = 0;
s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
/* we use recon only for encoded layers, otherwise it is not available */
s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
s_search_prms_blk.ps_search_candts = ps_search_candts;
if(s_search_prms_blk.i4_use_rec)
{
WORD32 ref_ctr;
for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_rec[ref_ctr];
}
else
{
WORD32 ref_ctr;
for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_inp[ref_ctr];
}
/*************************************************************************/
/* Initialize coordinates. Meaning as follows */
/* blk_x : x coordinate of the 16x16 blk, in terms of number of blks */
/* blk_y : same as above, y coord. */
/* num_blks_in_this_ctb : number of blks in this given ctb that starts */
/* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries. */
/* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left */
/* corner of the picture. Always multiple of 64. */
/* blk_id_in_ctb : encode order id of the blk in the ctb. */
/*************************************************************************/
blk_y = 0;
blk_id_in_ctb = 0;
i4_ctb_y = 0;
/*************************************************************************/
/* Picture limit on all 4 sides. This will be used to set mv limits for */
/* every block given its coordinate. Note thsi assumes that the min amt */
/* of padding to right of pic is equal to the blk size. If we go all the */
/* way upto 64x64, then the min padding on right size of picture should */
/* be 64, and also on bottom side of picture. */
/*************************************************************************/
SET_PIC_LIMIT(
s_pic_limit_inp,
ps_curr_layer->i4_pad_x_rec,
ps_curr_layer->i4_pad_y_rec,
ps_curr_layer->i4_wd,
ps_curr_layer->i4_ht,
s_search_prms_blk.i4_num_steps_post_refine);
SET_PIC_LIMIT(
s_pic_limit_rec,
ps_curr_layer->i4_pad_x_rec,
ps_curr_layer->i4_pad_y_rec,
ps_curr_layer->i4_wd,
ps_curr_layer->i4_ht,
s_search_prms_blk.i4_num_steps_post_refine);
/*************************************************************************/
/* set the MV limit per ref. pic. */
/* - P pic. : Based on the config params. */
/* - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
/*************************************************************************/
hme_set_mv_limit_using_dvsr_data(
ps_ctxt, ps_curr_layer, as_mv_limit, &i2_prev_enc_frm_max_mv_y, num_act_ref_pics);
s_srch_cand_init_data.pu1_num_fpel_search_cands = ps_refine_prms->au1_num_fpel_search_cands;
s_srch_cand_init_data.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
s_srch_cand_init_data.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
s_srch_cand_init_data.ps_coarse_layer = ps_coarse_layer;
s_srch_cand_init_data.ps_curr_layer = ps_curr_layer;
s_srch_cand_init_data.i4_max_num_init_cands = num_init_candts;
s_srch_cand_init_data.ps_search_cands = ps_search_candts;
s_srch_cand_init_data.u1_num_results_in_mvbank = s_mv_update_prms.i4_num_results_to_store;
s_srch_cand_init_data.pi4_ref_id_lc_to_l0_map = ps_ctxt->a_ref_idx_lc_to_l0;
s_srch_cand_init_data.pi4_ref_id_lc_to_l1_map = ps_ctxt->a_ref_idx_lc_to_l1;
s_srch_cand_init_data.e_search_blk_size = e_search_blk_size;
while(0 == end_of_frame)
{
job_queue_t *ps_job;
frm_ctb_ctxt_t *ps_frm_ctb_prms;
ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
WORD32 i4_max_mv_x_in_ctb;
WORD32 i4_max_mv_y_in_ctb;
void *pv_dep_mngr_encloop_dep_me;
WORD32 offset_val, check_dep_pos, set_dep_pos;
WORD32 left_ctb_in_diff_tile, i4_first_ctb_x = 0;
pv_dep_mngr_encloop_dep_me = ps_ctxt->pv_dep_mngr_encloop_dep_me;
ps_frm_ctb_prms = (frm_ctb_ctxt_t *)ps_thrd_ctxt->pv_ext_frm_prms;
/* Get the current row from the job queue */
ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job(
ps_multi_thrd_ctxt, lyr_job_type, 1, me_frm_id);
/* If all rows are done, set the end of process flag to 1, */
/* and the current row to -1 */
if(NULL == ps_job)
{
blk_y = -1;
i4_ctb_y = -1;
tile_col_idx = -1;
end_of_frame = 1;
continue;
}
/* set the output dependency after picking up the row */
ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, me_frm_id);
/* Obtain the current row's details from the job */
{
ihevce_tile_params_t *ps_col_tile_params;
i4_ctb_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
/* Obtain the current colum tile index from the job */
tile_col_idx = ps_job->s_job_info.s_me_job_info.i4_tile_col_idx;
/* in encode layer block are 16x16 and CTB is 64 x 64 */
/* note if ctb is 32x32 the this calc needs to be changed */
num_sync_units_in_row = (i4_pic_wd + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
ps_ctxt->log_ctb_size;
/* The tile parameter for the col. idx. Use only the properties
which is same for all the bottom tiles like width, start_x, etc.
Don't use height, start_y, etc. */
ps_col_tile_params =
((ihevce_tile_params_t *)ps_thrd_ctxt->pv_tile_params_base + tile_col_idx);
/* in encode layer block are 16x16 and CTB is 64 x 64 */
/* note if ctb is 32x32 the this calc needs to be changed */
num_sync_units_in_tile =
(ps_col_tile_params->i4_curr_tile_width + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
ps_ctxt->log_ctb_size;
i4_first_ctb_x = ps_col_tile_params->i4_first_ctb_x;
i4_ctb_x = i4_first_ctb_x;
if(!num_act_ref_pics)
{
for(i4_ctb_x = i4_first_ctb_x;
i4_ctb_x < (ps_col_tile_params->i4_first_ctb_x + num_sync_units_in_tile);
i4_ctb_x++)
{
S32 blk_i = 0, blk_j = 0;
/* set the dependency for the corresponding row in enc loop */
ihevce_dmgr_set_row_row_sync(
pv_dep_mngr_encloop_dep_me,
(i4_ctb_x + 1),
i4_ctb_y,
tile_col_idx /* Col Tile No. */);
}
continue;
}
/* increment the number of rows proc */
num_rows_proc++;
/* Set Variables for Dep. Checking and Setting */
set_dep_pos = i4_ctb_y + 1;
if(i4_ctb_y > 0)
{
offset_val = 2;
check_dep_pos = i4_ctb_y - 1;
}
else
{
/* First row should run without waiting */
offset_val = -1;
check_dep_pos = 0;
}
/* row ctb out pointer */
ps_ctxt->ps_ctb_analyse_curr_row =
ps_ctxt->ps_ctb_analyse_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
/* Row level CU Tree buffer */
ps_ctxt->ps_cu_tree_curr_row =
ps_ctxt->ps_cu_tree_base +
i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE;
ps_ctxt->ps_me_ctb_data_curr_row =
ps_ctxt->ps_me_ctb_data_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
}
/* This flag says the CTB under processing is at the start of tile in horz dir.*/
left_ctb_in_diff_tile = 1;
/* To make sure no 64-bit overflow happens when inv_wt is multiplied with un-normalized src_var, */
/* the shift value will be passed onto the functions wherever inv_wt isused so that inv_wt is appropriately shift and multiplied */
{
S32 i4_ref_id, i4_bits_req;
for(i4_ref_id = 0; i4_ref_id < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
i4_ref_id++)
{
GETRANGE(i4_bits_req, ps_ctxt->s_wt_pred.a_inv_wpred_wt[i4_ref_id]);
if(i4_bits_req > 12)
{
ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = (i4_bits_req - 12);
}
else
{
ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = 0;
}
}
s_common_frm_prms.pi4_inv_wt_shift_val = ps_ctxt->s_wt_pred.ai4_shift_val;
}
/* if non-encode layer then i4_ctb_x will be same as blk_x */
/* loop over all the units is a row */
for(i4_ctb_x = i4_first_ctb_x; i4_ctb_x < (i4_first_ctb_x + num_sync_units_in_tile);
i4_ctb_x++)
{
ihevce_ctb_noise_params *ps_ctb_noise_params =
&ps_ctxt->ps_ctb_analyse_curr_row[i4_ctb_x].s_ctb_noise_params;
s_common_frm_prms.i4_ctb_x_off = i4_ctb_x << 6;
s_common_frm_prms.i4_ctb_y_off = i4_ctb_y << 6;
ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = i4_ctb_y << 6;
ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = i4_ctb_x << 6;
/* Initialize ptr to current IPE CTB */
ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x +
i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
{
ps_ctb_bound_attrs =
get_ctb_attrs(i4_ctb_x << 6, i4_ctb_y << 6, i4_pic_wd, i4_pic_ht, ps_ctxt);
en_merge_32x32 = ps_ctb_bound_attrs->u1_merge_to_32x32_flag;
num_blks_in_this_ctb = ps_ctb_bound_attrs->u1_num_blks_in_ctb;
}
/* Block to initialise pointers to part_type_results_t */
/* in each size-specific inter_cu_results_t */
{
WORD32 i;
for(i = 0; i < 64; i++)
{
ps_ctxt->as_cu8x8_results[i].ps_best_results =
ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
.as_8x8_block_data[i]
.as_best_results;
ps_ctxt->as_cu8x8_results[i].u1_num_best_results = 0;
}
for(i = 0; i < 16; i++)
{
ps_ctxt->as_cu16x16_results[i].ps_best_results =
ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].as_block_data[i].as_best_results;
ps_ctxt->as_cu16x16_results[i].u1_num_best_results = 0;
}
for(i = 0; i < 4; i++)
{
ps_ctxt->as_cu32x32_results[i].ps_best_results =
ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
.as_32x32_block_data[i]
.as_best_results;
ps_ctxt->as_cu32x32_results[i].u1_num_best_results = 0;
}
ps_ctxt->s_cu64x64_results.ps_best_results =
ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].s_64x64_block_data.as_best_results;
ps_ctxt->s_cu64x64_results.u1_num_best_results = 0;
}
if(ME_PRISTINE_QUALITY == e_me_quality_presets)
{
ps_ctb_cluster_info->blk_32x32_mask = en_merge_32x32;
ps_ctb_cluster_info->ps_cur_ipe_ctb = ps_cur_ipe_ctb;
ps_ctb_cluster_info->ps_cu_tree_root =
ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
ps_ctb_cluster_info->nodes_created_in_cu_tree = 1;
}
if(ME_PRISTINE_QUALITY != e_me_quality_presets)
{
S32 i4_nodes_created_in_cu_tree = 1;
ihevce_cu_tree_init(
(ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
(ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
&i4_nodes_created_in_cu_tree,
0,
POS_NA,
POS_NA,
POS_NA);
}
memset(ai4_blk_8x8_mask, 0, 16 * sizeof(S32));
if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
{
S32 j;
ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
ps_cur_ipe_ctb =
ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x + i4_ctb_y * num_sync_units_in_row;
lambda_recon =
hme_recompute_lambda_from_min_8x8_act_in_ctb(ps_ctxt, ps_cur_ipe_ctb);
lambda_recon = ((float)lambda_recon * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f);
for(i = 0; i < 4; i++)
{
ps_search_results = &ps_ctxt->as_search_results_32x32[i];
for(j = 0; j < 2; j++)
{
ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
}
}
ps_search_results = &ps_ctxt->s_search_results_64x64;
for(j = 0; j < 2; j++)
{
ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
}
s_common_frm_prms.i4_lamda = lambda_recon;
}
else
{
lambda_recon = ps_refine_prms->lambda_recon;
}
/*********************************************************************/
/* replicate the inp buffer at blk or ctb level for each ref id, */
/* Instead of searching with wk * ref(k), we search with Ik = I / wk */
/* thereby avoiding a bloat up of memory. If we did all references */
/* weighted pred, we will end up with a duplicate copy of each ref */
/* at each layer, since we need to preserve the original reference. */
/* ToDo: Need to observe performance with this mechanism and compare */
/* with case where ref is weighted. */
/*********************************************************************/
fp_get_wt_inp(
ps_curr_layer,
&ps_ctxt->s_wt_pred,
unit_size,
s_common_frm_prms.i4_ctb_x_off,
s_common_frm_prms.i4_ctb_y_off,
unit_size,
ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
ps_ctxt->i4_wt_pred_enable_flag);
if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled)
{
#if TEMPORAL_NOISE_DETECT
{
WORD32 had_block_size = 16;
WORD32 ctb_width = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
? 64
: i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
WORD32 ctb_height = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
? 64
: i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
WORD32 num_pred_dir = i4_num_pred_dir;
WORD32 i4_x_off = s_common_frm_prms.i4_ctb_x_off;
WORD32 i4_y_off = s_common_frm_prms.i4_ctb_y_off;
WORD32 i;
WORD32 noise_detected;
WORD32 ctb_size;
WORD32 num_comp_had_blocks;
WORD32 noisy_block_cnt;
WORD32 index_8x8_block;
WORD32 num_8x8_in_ctb_row;
WORD32 ht_offset;
WORD32 wd_offset;
WORD32 block_ht;
WORD32 block_wd;
WORD32 num_horz_blocks;
WORD32 num_vert_blocks;
WORD32 mean;
UWORD32 variance_8x8;
WORD32 hh_energy_percent;
/* variables to hold the constant values. The variable values held are decided by the HAD block size */
WORD32 min_noisy_block_cnt;
WORD32 min_coeffs_above_avg;
WORD32 min_coeff_avg_energy;
/* to store the mean and variance of each 8*8 block and find the variance of any higher block sizes later on. block */
WORD32 i4_cu_x_off, i4_cu_y_off;
WORD32 is_noisy;
/* intialise the variables holding the constants */
if(had_block_size == 8)
{
min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_8x8; //6;//
min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_8x8;
min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_8x8;
}
else
{
min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_16x16; //7;//
min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_16x16;
min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_16x16;
}
/* initialize the variables */
noise_detected = 0;
noisy_block_cnt = 0;
hh_energy_percent = 0;
variance_8x8 = 0;
block_ht = ctb_height;
block_wd = ctb_width;
mean = 0;
ctb_size = block_ht * block_wd; //ctb_width * ctb_height;
num_comp_had_blocks = ctb_size / (had_block_size * had_block_size);
num_horz_blocks = block_wd / had_block_size; //ctb_width / had_block_size;
num_vert_blocks = block_ht / had_block_size; //ctb_height / had_block_size;
ht_offset = -had_block_size;
wd_offset = -had_block_size;
num_8x8_in_ctb_row = block_wd / 8; // number of 8x8 in this ctb
for(i = 0; i < num_comp_had_blocks; i++)
{
if(i % num_horz_blocks == 0)
{
wd_offset = -had_block_size;
ht_offset += had_block_size;
}
wd_offset += had_block_size;
/* CU level offsets */
i4_cu_x_off = i4_x_off + (i % 4) * 16; //+ (i % 4) * 16
i4_cu_y_off = i4_y_off + (i / 4) * 16;
/* if 50 % or more of the CU is noisy then the return value is 1 */
is_noisy = ihevce_determine_cu_noise_based_on_8x8Blk_data(
ps_ctb_noise_params->au1_is_8x8Blk_noisy,
(i % 4) * 16,
(i / 4) * 16,
16);
/* only if the CU is noisy then check the temporal noise detect call is made on the CU */
if(is_noisy)
{
index_8x8_block = (i / num_horz_blocks) * 2 * num_8x8_in_ctb_row +
(i % num_horz_blocks) * 2;
noisy_block_cnt += ihevce_16x16block_temporal_noise_detect(
16,
((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
? 64
: i4_pic_wd - s_common_frm_prms.i4_ctb_x_off,
((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
? 64
: i4_pic_ht - s_common_frm_prms.i4_ctb_y_off,
ps_ctb_noise_params,
&s_srch_cand_init_data,
&s_search_prms_blk,
ps_ctxt,
num_pred_dir,
i4_num_act_ref_l0,
i4_num_act_ref_l1,
i4_cu_x_off,
i4_cu_y_off,
&ps_ctxt->s_wt_pred,
unit_size,
index_8x8_block,
num_horz_blocks,
/*num_8x8_in_ctb_row*/ 8, // this should be a variable extra
i);
} /* if 16x16 is noisy */
} /* loop over for all 16x16*/
if(noisy_block_cnt >= min_noisy_block_cnt)
{
noise_detected = 1;
}
/* write back the noise presence detected for the current CTB to the structure */
ps_ctb_noise_params->i4_noise_present = noise_detected;
}
#endif
#if EVERYWHERE_NOISY && USE_NOISE_TERM_IN_L0_ME
if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled &&
ps_ctb_noise_params->i4_noise_present)
{
memset(
ps_ctb_noise_params->au1_is_8x8Blk_noisy,
1,
sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
}
#endif
for(i = 0; i < 16; i++)
{
au1_is_16x16Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 4) * 16, (i / 4) * 16, 16);
}
for(i = 0; i < 4; i++)
{
au1_is_32x32Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 2) * 32, (i / 2) * 32, 32);
}
for(i = 0; i < 1; i++)
{
au1_is_64x64Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
ps_ctb_noise_params->au1_is_8x8Blk_noisy, 0, 0, 64);
}
if(ps_ctxt->s_frm_prms.bidir_enabled &&
(ps_ctxt->s_frm_prms.i4_temporal_layer_id <=
MAX_LAYER_ID_OF_B_PICS_WITHOUT_NOISE_DETECTION))
{
ps_ctb_noise_params->i4_noise_present = 0;
memset(
ps_ctb_noise_params->au1_is_8x8Blk_noisy,
0,
sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
}
#if ME_LAMBDA_DISCOUNT_WHEN_NOISY
for(i = 0; i < 4; i++)
{
S32 j;
S32 lambda;
if(au1_is_32x32Blk_noisy[i])
{
lambda = lambda_recon;
lambda =
((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
ps_search_results = &ps_ctxt->as_search_results_32x32[i];
for(j = 0; j < 2; j++)
{
ps_search_results->as_pred_ctxt[j].lambda = lambda;
}
}
}
{
S32 j;
S32 lambda;
if(au1_is_64x64Blk_noisy[0])
{
lambda = lambda_recon;
lambda =
((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
ps_search_results = &ps_ctxt->s_search_results_64x64;
for(j = 0; j < 2; j++)
{
ps_search_results->as_pred_ctxt[j].lambda = lambda;
}
}
}
#endif
if(au1_is_64x64Blk_noisy[0])
{
U08 *pu1_inp = ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
(s_common_frm_prms.i4_ctb_y_off *
ps_curr_layer->i4_inp_stride));
hme_compute_sigmaX_and_sigmaXSquared(
pu1_inp,
ps_curr_layer->i4_inp_stride,
ps_ctxt->au4_4x4_src_sigmaX,
ps_ctxt->au4_4x4_src_sigmaXSquared,
4,
4,
64,
64,
1,
16);
}
else
{
for(i = 0; i < 4; i++)
{
if(au1_is_32x32Blk_noisy[i])
{
U08 *pu1_inp =
ps_curr_layer->pu1_inp +
(s_common_frm_prms.i4_ctb_x_off +
(s_common_frm_prms.i4_ctb_y_off * ps_curr_layer->i4_inp_stride));
U08 u1_cu_size = 32;
WORD32 i4_inp_buf_offset =
(((i / 2) * (u1_cu_size * ps_curr_layer->i4_inp_stride)) +
((i % 2) * u1_cu_size));
U16 u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb = 128;
U16 u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb = 8;
S32 i4_sigma_arr_offset =
(((i / 2) * u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb) +
((i % 2) * u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb));
hme_compute_sigmaX_and_sigmaXSquared(
pu1_inp + i4_inp_buf_offset,
ps_curr_layer->i4_inp_stride,
ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset,
ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset,
4,
4,
32,
32,
1,
16);
}
else
{
S32 j;
U08 u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb = 8;
U08 u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb = 2;
S32 i4_16x16_blk_start_index_in_i_th_32x32_blk =
(((i / 2) * u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb) +
((i % 2) * u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb));
for(j = 0; j < 4; j++)
{
U08 u1_3rd_16x16_blk_index_in_32x32_blk = 4;
U08 u1_2nd_16x16_blk_index_in_32x32_blk = 1;
S32 i4_16x16_blk_index_in_ctb =
i4_16x16_blk_start_index_in_i_th_32x32_blk +
((j % 2) * u1_2nd_16x16_blk_index_in_32x32_blk) +
((j / 2) * u1_3rd_16x16_blk_index_in_32x32_blk);
//S32 k = (((i / 2) * 8) + ((i % 2) * 2)) + ((j % 2) * 1) + ((j / 2) * 4);
if(au1_is_16x16Blk_noisy[i4_16x16_blk_index_in_ctb])
{
U08 *pu1_inp =
ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
(s_common_frm_prms.i4_ctb_y_off *
ps_curr_layer->i4_inp_stride));
U08 u1_cu_size = 16;
WORD32 i4_inp_buf_offset =
(((i4_16x16_blk_index_in_ctb % 4) * u1_cu_size) +
((i4_16x16_blk_index_in_ctb / 4) *
(u1_cu_size * ps_curr_layer->i4_inp_stride)));
U16 u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk = 64;
U16 u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk = 4;
S32 i4_sigma_arr_offset =
(((i4_16x16_blk_index_in_ctb % 4) *
u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk) +
((i4_16x16_blk_index_in_ctb / 4) *
u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk));
hme_compute_sigmaX_and_sigmaXSquared(
pu1_inp + i4_inp_buf_offset,
ps_curr_layer->i4_inp_stride,
(ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset),
(ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset),
4,
4,
16,
16,
1,
16);
}
}
}
}
}
}
else
{
memset(au1_is_16x16Blk_noisy, 0, sizeof(au1_is_16x16Blk_noisy));
memset(au1_is_32x32Blk_noisy, 0, sizeof(au1_is_32x32Blk_noisy));
memset(au1_is_64x64Blk_noisy, 0, sizeof(au1_is_64x64Blk_noisy));
}
for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
{
S32 ref_ctr;
U08 au1_pred_dir_searched[2];
U08 u1_is_cu_noisy;
ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
{
blk_x = (i4_ctb_x << 2) +
(ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_x);
blk_y = (i4_ctb_y << 2) +
(ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_y);
blk_id_in_full_ctb =
ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_id_in_full_ctb;
blk_8x8_mask = ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_8x8_mask;
ai4_blk_8x8_mask[blk_id_in_full_ctb] = blk_8x8_mask;
s_search_prms_blk.i4_cu_x_off = (blk_x << blk_size_shift) - (i4_ctb_x << 6);
s_search_prms_blk.i4_cu_y_off = (blk_y << blk_size_shift) - (i4_ctb_y << 6);
}
/* get the current input blk point */
pos_x = blk_x << blk_size_shift;
pos_y = blk_y << blk_size_shift;
pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
/*********************************************************************/
/* For every blk in the picture, the search range needs to be derived*/
/* Any blk can have any mv, but practical search constraints are */
/* imposed by the picture boundary and amt of padding. */
/*********************************************************************/
/* MV limit is different based on ref. PIC */
for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
{
if(!s_search_prms_blk.i4_use_rec)
{
hme_derive_search_range(
&as_range_prms_inp[ref_ctr],
&s_pic_limit_inp,
&as_mv_limit[ref_ctr],
pos_x,
pos_y,
blk_wd,
blk_ht);
}
else
{
hme_derive_search_range(
&as_range_prms_rec[ref_ctr],
&s_pic_limit_rec,
&as_mv_limit[ref_ctr],
pos_x,
pos_y,
blk_wd,
blk_ht);
}
}
s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
/* Select search results from a suitable search result in the context */
{
ps_search_results = &ps_ctxt->as_search_results_16x16[blk_id_in_full_ctb];
if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
{
S32 i;
for(i = 0; i < 2; i++)
{
ps_search_results->as_pred_ctxt[i].lambda = lambda_recon;
}
}
}
u1_is_cu_noisy = au1_is_16x16Blk_noisy
[(s_search_prms_blk.i4_cu_x_off >> 4) + (s_search_prms_blk.i4_cu_y_off >> 2)];
s_subpel_prms.u1_is_cu_noisy = u1_is_cu_noisy;
#if ME_LAMBDA_DISCOUNT_WHEN_NOISY
if(u1_is_cu_noisy)
{
S32 j;
S32 lambda;
lambda = lambda_recon;
lambda = ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
for(j = 0; j < 2; j++)
{
ps_search_results->as_pred_ctxt[j].lambda = lambda;
}
}
else
{
S32 j;
S32 lambda;
lambda = lambda_recon;
for(j = 0; j < 2; j++)
{
ps_search_results->as_pred_ctxt[j].lambda = lambda;
}
}
#endif
s_search_prms_blk.ps_search_results = ps_search_results;
s_search_prms_blk.i4_part_mask = hme_part_mask_populator(
pu1_inp,
i4_inp_stride,
ps_refine_prms->limit_active_partitions,
ps_ctxt->ps_hme_frm_prms->bidir_enabled,
ps_ctxt->u1_is_curFrame_a_refFrame,
blk_8x8_mask,
e_me_quality_presets);
if(ME_PRISTINE_QUALITY == e_me_quality_presets)
{
ps_ctb_cluster_info->ai4_part_mask[blk_id_in_full_ctb] =
s_search_prms_blk.i4_part_mask;
}
/* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
{
/* Setting u1_num_active_refs to 2 */
/* for the sole purpose of the */
/* function called below */
ps_search_results->u1_num_active_ref = (ps_refine_prms->bidir_enabled) ? 2 : 1;
hme_reset_search_results(
ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
ps_search_results->u1_num_active_ref = i4_num_pred_dir;
}
if(0 == blk_id_in_ctb)
{
UWORD8 u1_ctr;
for(u1_ctr = 0; u1_ctr < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
u1_ctr++)
{
WORD32 i4_max_dep_ctb_y;
WORD32 i4_max_dep_ctb_x;
/* Set max mv in ctb units */
i4_max_mv_x_in_ctb =
(ps_curr_layer->i2_max_mv_x + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
ps_ctxt->log_ctb_size;
i4_max_mv_y_in_ctb =
(as_mv_limit[u1_ctr].i2_max_y + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
ps_ctxt->log_ctb_size;
/********************************************************************/
/* Set max ctb_x and ctb_y dependency on reference picture */
/* Note +1 is due to delayed deblock, SAO, subpel plan dependency */
/********************************************************************/
i4_max_dep_ctb_x = CLIP3(
(i4_ctb_x + i4_max_mv_x_in_ctb + 1),
0,
ps_frm_ctb_prms->i4_num_ctbs_horz - 1);
i4_max_dep_ctb_y = CLIP3(
(i4_ctb_y + i4_max_mv_y_in_ctb + 1),
0,
ps_frm_ctb_prms->i4_num_ctbs_vert - 1);
ihevce_dmgr_map_chk_sync(
ps_curr_layer->ppv_dep_mngr_recon[u1_ctr],
ps_ctxt->thrd_id,
i4_ctb_x,
i4_ctb_y,
i4_max_mv_x_in_ctb,
i4_max_mv_y_in_ctb);
}
}
/* Loop across different Ref IDx */
for(u1_pred_dir_ctr = 0; u1_pred_dir_ctr < i4_num_pred_dir; u1_pred_dir_ctr++)
{
S32 resultid;
S08 u1_default_ref_id;
S32 i4_num_srch_cands = 0;
S32 i4_num_refinement_iterations;
S32 i4_refine_iter_ctr;
if((i4_num_pred_dir == 2) || (!ps_ctxt->s_frm_prms.bidir_enabled) ||
(ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0))
{
u1_pred_dir = u1_pred_dir_ctr;
}
else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
{
u1_pred_dir = 1;
}
u1_default_ref_id = (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list[0]
: ps_ctxt->ai1_future_list[0];
au1_pred_dir_searched[u1_pred_dir_ctr] = u1_pred_dir;
i4_num_srch_cands = 0;
resultid = 0;
/* START OF NEW CTB MEANS FILL UP NEOGHBOURS IN 18x18 GRID */
if(0 == blk_id_in_ctb)
{
/*****************************************************************/
/* Initialize the mv grid with results of neighbours for the next*/
/* ctb. */
/*****************************************************************/
hme_fill_ctb_neighbour_mvs(
ps_curr_layer,
blk_x,
blk_y,
aps_mv_grid[u1_pred_dir],
u1_pred_dir_ctr,
u1_default_ref_id,
ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
}
s_search_prms_blk.i1_ref_idx = u1_pred_dir;
{
if((blk_id_in_full_ctb % 4) == 0)
{
ps_ctxt->as_search_results_32x32[blk_id_in_full_ctb >> 2]
.as_pred_ctxt[u1_pred_dir]
.proj_used = (blk_id_in_full_ctb == 8) ? 0 : 1;
}
if(blk_id_in_full_ctb == 0)
{
ps_ctxt->s_search_results_64x64.as_pred_ctxt[u1_pred_dir].proj_used = 1;
}
ps_search_results->as_pred_ctxt[u1_pred_dir].proj_used =
!gau1_encode_to_raster_y[blk_id_in_full_ctb];
}
{
S32 x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
S32 y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
U08 u1_is_blk_at_ctb_boundary = !y;
s_srch_cand_init_data.u1_is_left_available =
!(left_ctb_in_diff_tile && !s_search_prms_blk.i4_cu_x_off);
if(u1_is_blk_at_ctb_boundary)
{
s_srch_cand_init_data.u1_is_topRight_available = 0;
s_srch_cand_init_data.u1_is_topLeft_available = 0;
s_srch_cand_init_data.u1_is_top_available = 0;
}
else
{
s_srch_cand_init_data.u1_is_topRight_available =
gau1_cu_tr_valid[y][x] && ((pos_x + blk_wd) < i4_pic_wd);
s_srch_cand_init_data.u1_is_top_available = 1;
s_srch_cand_init_data.u1_is_topLeft_available =
s_srch_cand_init_data.u1_is_left_available;
}
}
s_srch_cand_init_data.i1_default_ref_id = u1_default_ref_id;
s_srch_cand_init_data.i1_alt_default_ref_id = ps_ctxt->ai1_past_list[1];
s_srch_cand_init_data.i4_pos_x = pos_x;
s_srch_cand_init_data.i4_pos_y = pos_y;
s_srch_cand_init_data.u1_pred_dir = u1_pred_dir;
s_srch_cand_init_data.u1_pred_dir_ctr = u1_pred_dir_ctr;
s_srch_cand_init_data.u1_search_candidate_list_index =
au1_search_candidate_list_index[u1_pred_dir];
i4_num_srch_cands = hme_populate_search_candidates(&s_srch_cand_init_data);
/* Note this block also clips the MV range for all candidates */
{
S08 i1_check_for_mult_refs;
i1_check_for_mult_refs = u1_pred_dir ? (ps_ctxt->num_ref_future > 1)
: (ps_ctxt->num_ref_past > 1);
ps_me_optimised_function_list->pf_mv_clipper(
&s_search_prms_blk,
i4_num_srch_cands,
i1_check_for_mult_refs,
ps_refine_prms->i4_num_steps_fpel_refine,
ps_refine_prms->i4_num_steps_hpel_refine,
ps_refine_prms->i4_num_steps_qpel_refine);
}
#if ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0
i4_num_refinement_iterations =
((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1))
? ((e_me_quality_presets == ME_HIGH_QUALITY) ? 2 : i4_num_act_ref_l0)
: 1;
#else
i4_num_refinement_iterations =
((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1)) ? 2 : 1;
#endif
#if ENABLE_EXPLICIT_SEARCH_IN_PQ
if(e_me_quality_presets == ME_PRISTINE_QUALITY)
{
i4_num_refinement_iterations = (u1_pred_dir == 0) ? i4_num_act_ref_l0
: i4_num_act_ref_l1;
}
#endif
for(i4_refine_iter_ctr = 0; i4_refine_iter_ctr < i4_num_refinement_iterations;
i4_refine_iter_ctr++)
{
S32 center_x;
S32 center_y;
S32 center_ref_idx;
S08 *pi1_pred_dir_to_ref_idx =
(u1_pred_dir == 0) ? ps_ctxt->ai1_past_list : ps_ctxt->ai1_future_list;
{
WORD32 i4_i;
for(i4_i = 0; i4_i < TOT_NUM_PARTS; i4_i++)
{
ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i] =
MAX_SIGNED_16BIT_VAL;
ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i] = 0;
ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i] = 0;
ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i] = u1_default_ref_id;
if(ps_refine_prms->i4_num_results_per_part == 2)
{
ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i] =
MAX_SIGNED_16BIT_VAL;
ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i] =
MAX_SIGNED_16BIT_VAL;
ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i] =
MAX_SIGNED_16BIT_VAL;
ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i] = 0;
ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i] = 0;
ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i] = u1_default_ref_id;
}
}
s_search_prms_blk.ps_fullpel_refine_ctxt = ps_fullpel_refine_ctxt;
s_subpel_prms.ps_subpel_refine_ctxt = ps_fullpel_refine_ctxt;
}
{
search_node_t *ps_coloc_node;
S32 i = 0;
if(i4_num_refinement_iterations > 1)
{
for(i = 0; i < ai4_num_coloc_cands[u1_pred_dir]; i++)
{
ps_coloc_node =
s_search_prms_blk.ps_search_candts[ai4_id_coloc[i]]
.ps_search_node;
if(pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr] ==
ps_coloc_node->i1_ref_idx)
{
break;
}
}
if(i == ai4_num_coloc_cands[u1_pred_dir])
{
i = 0;
}
}
else
{
ps_coloc_node = s_search_prms_blk.ps_search_candts[ai4_id_coloc[0]]
.ps_search_node;
}
hme_set_mvp_node(
ps_search_results,
ps_coloc_node,
u1_pred_dir,
(i4_num_refinement_iterations > 1)
? pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr]
: u1_default_ref_id);
center_x = ps_coloc_node->ps_mv->i2_mvx;
center_y = ps_coloc_node->ps_mv->i2_mvy;
center_ref_idx = ps_coloc_node->i1_ref_idx;
}
/* Full-Pel search */
{
S32 num_unique_nodes;
memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
num_unique_nodes = hme_remove_duplicate_fpel_search_candidates(
as_unique_search_nodes,
s_search_prms_blk.ps_search_candts,
au4_unique_node_map,
pi1_pred_dir_to_ref_idx,
i4_num_srch_cands,
s_search_prms_blk.i4_num_init_candts,
i4_refine_iter_ctr,
i4_num_refinement_iterations,
i4_num_act_ref_l0,
center_ref_idx,
center_x,
center_y,
ps_ctxt->s_frm_prms.bidir_enabled,
e_me_quality_presets);
/*************************************************************************/
/* This array stores the ids of the partitions whose */
/* SADs are updated. Since the partitions whose SADs are updated may not */
/* be in contiguous order, we supply another level of indirection. */
/*************************************************************************/
ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids(
s_search_prms_blk.i4_part_mask,
&ps_fullpel_refine_ctxt->ai4_part_id[0]);
if(!i4_refine_iter_ctr && !u1_pred_dir_ctr && u1_is_cu_noisy)
{
S32 i;
/*i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values*/
S32 i4_sigma_array_offset = (s_search_prms_blk.i4_cu_x_off / 4) +
(s_search_prms_blk.i4_cu_y_off * 4);
for(i = 0; i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i++)
{
S32 i4_part_id = ps_fullpel_refine_ctxt->ai4_part_id[i];
hme_compute_final_sigma_of_pu_from_base_blocks(
ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
au8_final_src_sigmaX,
au8_final_src_sigmaXSquared,
16,
4,
i4_part_id,
16);
}
s_common_frm_prms.pu8_part_src_sigmaX = au8_final_src_sigmaX;
s_common_frm_prms.pu8_part_src_sigmaXSquared =
au8_final_src_sigmaXSquared;
s_search_prms_blk.pu8_part_src_sigmaX = au8_final_src_sigmaX;
s_search_prms_blk.pu8_part_src_sigmaXSquared =
au8_final_src_sigmaXSquared;
}
if(0 == num_unique_nodes)
{
continue;
}
if(num_unique_nodes >= 2)
{
s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
if(ps_ctxt->i4_pic_type != IV_P_FRAME)
{
if(ps_ctxt->i4_temporal_layer == 1)
{
hme_fullpel_cand_sifter(
&s_search_prms_blk,
ps_curr_layer,
&ps_ctxt->s_wt_pred,
ALPHA_FOR_NOISE_TERM_IN_ME,
u1_is_cu_noisy,
ps_me_optimised_function_list);
}
else
{
hme_fullpel_cand_sifter(
&s_search_prms_blk,
ps_curr_layer,
&ps_ctxt->s_wt_pred,
ALPHA_FOR_NOISE_TERM_IN_ME,
u1_is_cu_noisy,
ps_me_optimised_function_list);
}
}
else
{
hme_fullpel_cand_sifter(
&s_search_prms_blk,
ps_curr_layer,
&ps_ctxt->s_wt_pred,
ALPHA_FOR_NOISE_TERM_IN_ME_P,
u1_is_cu_noisy,
ps_me_optimised_function_list);
}
}
s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
hme_fullpel_refine(
ps_refine_prms,
&s_search_prms_blk,
ps_curr_layer,
&ps_ctxt->s_wt_pred,
au4_unique_node_map,
num_unique_nodes,
blk_8x8_mask,
center_x,
center_y,
center_ref_idx,
e_me_quality_presets,
ps_me_optimised_function_list);
}
/* Sub-Pel search */
{
hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
s_subpel_prms.pu1_wkg_mem = (U08 *)hme_get_wkg_mem(
&ps_ctxt->s_buf_mgr,
INTERP_INTERMED_BUF_SIZE + INTERP_OUT_BUF_SIZE);
/* MV limit is different based on ref. PIC */
for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
{
SCALE_RANGE_PRMS(
as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
SCALE_RANGE_PRMS(
as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
}
s_subpel_prms.i4_ctb_x_off = i4_ctb_x << 6;
s_subpel_prms.i4_ctb_y_off = i4_ctb_y << 6;
hme_subpel_refine_cu_hs(
&s_subpel_prms,
ps_curr_layer,
ps_search_results,
u1_pred_dir,
&ps_ctxt->s_wt_pred,
blk_8x8_mask,
ps_ctxt->ps_func_selector,
ps_cmn_utils_optimised_function_list,
ps_me_optimised_function_list);
}
}
}
/* Populate the new PU struct with the results post subpel refinement*/
{
inter_cu_results_t *ps_cu_results;
WORD32 best_inter_cost, intra_cost, posx, posy;
UWORD8 intra_8x8_enabled = 0;
/* cost of 16x16 cu parent */
WORD32 parent_cost = MAX_32BIT_VAL;
/* cost of 8x8 cu children */
/*********************************************************************/
/* Assuming parent is not split, then we signal 1 bit for this parent*/
/* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
/* So, 4*lambda is extra for children cost. */
/*********************************************************************/
WORD32 child_cost = 0;
ps_cu_results = ps_search_results->ps_cu_results;
/* Initialize the pu_results pointers to the first struct in the stack array */
ps_pu_results = as_inter_pu_results;
hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
hme_populate_pus(
ps_thrd_ctxt,
ps_ctxt,
&s_subpel_prms,
ps_search_results,
ps_cu_results,
ps_pu_results,
&(as_pu_results[0][0][0]),
&s_common_frm_prms,
&ps_ctxt->s_wt_pred,
ps_curr_layer,
au1_pred_dir_searched,
i4_num_pred_dir);
ps_cu_results->i4_inp_offset =
(ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
hme_decide_part_types(
ps_cu_results,
ps_pu_results,
&s_common_frm_prms,
ps_ctxt,
ps_cmn_utils_optimised_function_list,
ps_me_optimised_function_list
);
/* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
/* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
{
WORD32 res_ctr;
for(res_ctr = 0; res_ctr < ps_cu_results->u1_num_best_results; res_ctr++)
{
WORD32 num_part = 2, part_ctr;
part_type_results_t *ps_best_results =
&ps_cu_results->ps_best_results[res_ctr];
if(PRT_2Nx2N == ps_best_results->u1_part_type)
num_part = 1;
for(part_ctr = 0; part_ctr < num_part; part_ctr++)
{
pu_result_t *ps_pu_results =
&ps_best_results->as_pu_results[part_ctr];
ASSERT(PRED_L0 == ps_pu_results->pu.b2_pred_mode);
hme_update_dynamic_search_params(
&ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p]
.as_dyn_range_prms[ps_pu_results->pu.mv.i1_l0_ref_idx],
ps_pu_results->pu.mv.s_l0_mv.i2_mvy);
/* Sanity Check */
ASSERT(
ps_pu_results->pu.mv.i1_l0_ref_idx <
ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
/* No L1 for P Pic. */
ASSERT(PRED_L1 != ps_pu_results->pu.b2_pred_mode);
/* No BI for P Pic. */
ASSERT(PRED_BI != ps_pu_results->pu.b2_pred_mode);
}
}
}
/*****************************************************************/
/* INSERT INTRA RESULTS AT 16x16 LEVEL. */
/*****************************************************************/
#if DISABLE_INTRA_IN_BPICS
if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
(ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
#endif
{
if(!(DISABLE_INTRA_WHEN_NOISY && s_common_frm_prms.u1_is_cu_noisy))
{
hme_insert_intra_nodes_post_bipred(
ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
}
}
#if DISABLE_INTRA_IN_BPICS
if((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
(ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
{
intra_8x8_enabled = 0;
}
else
#endif
{
/*TRAQO intra flag updation*/
if(1 == ps_cu_results->ps_best_results->as_pu_results[0].pu.b1_intra_flag)
{
best_inter_cost =
ps_cu_results->ps_best_results->as_pu_results[1].i4_tot_cost;
intra_cost =
ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
/*@16x16 level*/
posx = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_x
<< 2) >>
4;
posy = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_y
<< 2) >>
4;
}
else
{
best_inter_cost =
ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
posx = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_x
<< 2) >>
3;
posy = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_y
<< 2) >>
3;
}
/* Disable intra16/32/64 flags based on split flags recommended by IPE */
if(ps_cur_ipe_ctb->u1_split_flag)
{
/* Id of the 32x32 block, 16x16 block in a CTB */
WORD32 i4_32x32_id =
(ps_cu_results->u1_y_off >> 5) * 2 + (ps_cu_results->u1_x_off >> 5);
WORD32 i4_16x16_id = ((ps_cu_results->u1_y_off >> 4) & 0x1) * 2 +
((ps_cu_results->u1_x_off >> 4) & 0x1);
if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
{
if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
.as_intra16_analyse[i4_16x16_id]
.b1_split_flag)
{
intra_8x8_enabled =
ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
.as_intra16_analyse[i4_16x16_id]
.as_intra8_analyse[0]
.b1_valid_cu;
intra_8x8_enabled &=
ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
.as_intra16_analyse[i4_16x16_id]
.as_intra8_analyse[1]
.b1_valid_cu;
intra_8x8_enabled &=
ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
.as_intra16_analyse[i4_16x16_id]
.as_intra8_analyse[2]
.b1_valid_cu;
intra_8x8_enabled &=
ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
.as_intra16_analyse[i4_16x16_id]
.as_intra8_analyse[3]
.b1_valid_cu;
}
}
}
}
if(blk_8x8_mask == 0xf)
{
parent_cost =
ps_search_results->ps_cu_results->ps_best_results[0].i4_tot_cost;
ps_search_results->u1_split_flag = 0;
}
else
{
ps_search_results->u1_split_flag = 1;
}
ps_cu_results = &ps_ctxt->as_cu8x8_results[blk_id_in_full_ctb << 2];
if(s_common_frm_prms.u1_is_cu_noisy)
{
intra_8x8_enabled = 0;
}
/* Evalaute 8x8 if NxN part id is enabled */
if((ps_search_results->i4_part_mask & ENABLE_NxN) || intra_8x8_enabled)
{
/* Populates the PU's for the 4 8x8's in one call */
hme_populate_pus_8x8_cu(
ps_thrd_ctxt,
ps_ctxt,
&s_subpel_prms,
ps_search_results,
ps_cu_results,
ps_pu_results,
&(as_pu_results[0][0][0]),
&s_common_frm_prms,
au1_pred_dir_searched,
i4_num_pred_dir,
blk_8x8_mask);
/* Re-initialize the pu_results pointers to the first struct in the stack array */
ps_pu_results = as_inter_pu_results;
for(i = 0; i < 4; i++)
{
if((blk_8x8_mask & (1 << i)))
{
if(ps_cu_results->i4_part_mask)
{
hme_decide_part_types(
ps_cu_results,
ps_pu_results,
&s_common_frm_prms,
ps_ctxt,
ps_cmn_utils_optimised_function_list,
ps_me_optimised_function_list
);
}
/*****************************************************************/
/* INSERT INTRA RESULTS AT 8x8 LEVEL. */
/*****************************************************************/
#if DISABLE_INTRA_IN_BPICS
if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
(ps_ctxt->s_frm_prms.i4_temporal_layer_id >
TEMPORAL_LAYER_DISABLE)))
#endif
{
if(!(DISABLE_INTRA_WHEN_NOISY &&
s_common_frm_prms.u1_is_cu_noisy))
{
hme_insert_intra_nodes_post_bipred(
ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
}
}
child_cost += ps_cu_results->ps_best_results[0].i4_tot_cost;
}
ps_cu_results++;
ps_pu_results++;
}
/* Compare 16x16 vs 8x8 cost */
if(child_cost < parent_cost)
{
ps_search_results->best_cu_cost = child_cost;
ps_search_results->u1_split_flag = 1;
}
}
}
hme_update_mv_bank_encode(
ps_search_results,
ps_curr_layer->ps_layer_mvbank,
blk_x,
blk_y,
&s_mv_update_prms,
au1_pred_dir_searched,
i4_num_act_ref_l0);
/*********************************************************************/
/* Map the best results to an MV Grid. This is a 18x18 grid that is */
/* useful for doing things like predictor for cost calculation or */
/* also for merge calculations if need be. */
/*********************************************************************/
hme_map_mvs_to_grid(
&aps_mv_grid[0], ps_search_results, au1_pred_dir_searched, i4_num_pred_dir);
}
/* Set the CU tree nodes appropriately */
if(e_me_quality_presets != ME_PRISTINE_QUALITY)
{
WORD32 i, j;
for(i = 0; i < 16; i++)
{
cur_ctb_cu_tree_t *ps_tree_node =
ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
search_results_t *ps_results = &ps_ctxt->as_search_results_16x16[i];
switch(i >> 2)
{
case 0:
{
ps_tree_node = ps_tree_node->ps_child_node_tl;
break;
}
case 1:
{
ps_tree_node = ps_tree_node->ps_child_node_tr;
break;
}
case 2:
{
ps_tree_node = ps_tree_node->ps_child_node_bl;
break;
}
case 3:
{
ps_tree_node = ps_tree_node->ps_child_node_br;
break;
}
}
switch(i % 4)
{
case 0:
{
ps_tree_node = ps_tree_node->ps_child_node_tl;
break;
}
case 1:
{
ps_tree_node = ps_tree_node->ps_child_node_tr;
break;
}
case 2:
{
ps_tree_node = ps_tree_node->ps_child_node_bl;
break;
}
case 3:
{
ps_tree_node = ps_tree_node->ps_child_node_br;
break;
}
}
if(ai4_blk_8x8_mask[i] == 15)
{
if(!ps_results->u1_split_flag)
{
ps_tree_node->is_node_valid = 1;
NULLIFY_THE_CHILDREN_NODES(ps_tree_node);
}
else
{
ps_tree_node->is_node_valid = 0;
ENABLE_THE_CHILDREN_NODES(ps_tree_node);
}
}
else
{
cur_ctb_cu_tree_t *ps_tree_child;
ps_tree_node->is_node_valid = 0;
for(j = 0; j < 4; j++)
{
switch(j)
{
case 0:
{
ps_tree_child = ps_tree_node->ps_child_node_tl;
break;
}
case 1:
{
ps_tree_child = ps_tree_node->ps_child_node_tr;
break;
}
case 2:
{
ps_tree_child = ps_tree_node->ps_child_node_bl;
break;
}
case 3:
{
ps_tree_child = ps_tree_node->ps_child_node_br;
break;
}
}
ps_tree_child->is_node_valid = !!(ai4_blk_8x8_mask[i] & (1 << j));
}
}
}
}
if(ME_PRISTINE_QUALITY == e_me_quality_presets)
{
cur_ctb_cu_tree_t *ps_tree = ps_ctb_cluster_info->ps_cu_tree_root;
hme_analyse_mv_clustering(
ps_ctxt->as_search_results_16x16,
ps_ctxt->as_cu16x16_results,
ps_ctxt->as_cu8x8_results,
ps_ctxt->ps_ctb_cluster_info,
ps_ctxt->ai1_future_list,
ps_ctxt->ai1_past_list,
ps_ctxt->s_frm_prms.bidir_enabled,
e_me_quality_presets);
#if DISABLE_BLK_MERGE_WHEN_NOISY
ps_tree->ps_child_node_tl->is_node_valid = !au1_is_32x32Blk_noisy[0];
ps_tree->ps_child_node_tr->is_node_valid = !au1_is_32x32Blk_noisy[1];
ps_tree->ps_child_node_bl->is_node_valid = !au1_is_32x32Blk_noisy[2];
ps_tree->ps_child_node_br->is_node_valid = !au1_is_32x32Blk_noisy[3];
ps_tree->ps_child_node_tl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[0];
ps_tree->ps_child_node_tr->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[1];
ps_tree->ps_child_node_bl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[2];
ps_tree->ps_child_node_br->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[3];
ps_tree->is_node_valid = !au1_is_64x64Blk_noisy[0];
ps_tree->u1_inter_eval_enable = !au1_is_64x64Blk_noisy[0];
#endif
en_merge_32x32 = (ps_tree->ps_child_node_tl->is_node_valid << 0) |
(ps_tree->ps_child_node_tr->is_node_valid << 1) |
(ps_tree->ps_child_node_bl->is_node_valid << 2) |
(ps_tree->ps_child_node_br->is_node_valid << 3);
en_merge_execution = (ps_tree->ps_child_node_tl->u1_inter_eval_enable << 0) |
(ps_tree->ps_child_node_tr->u1_inter_eval_enable << 1) |
(ps_tree->ps_child_node_bl->u1_inter_eval_enable << 2) |
(ps_tree->ps_child_node_br->u1_inter_eval_enable << 3) |
(ps_tree->u1_inter_eval_enable << 4);
}
else
{
en_merge_execution = 0x1f;
#if DISABLE_BLK_MERGE_WHEN_NOISY
en_merge_32x32 = ((!au1_is_32x32Blk_noisy[0] << 0) & (en_merge_32x32 & 1)) |
((!au1_is_32x32Blk_noisy[1] << 1) & (en_merge_32x32 & 2)) |
((!au1_is_32x32Blk_noisy[2] << 2) & (en_merge_32x32 & 4)) |
((!au1_is_32x32Blk_noisy[3] << 3) & (en_merge_32x32 & 8));
#endif
}
/* Re-initialize the pu_results pointers to the first struct in the stack array */
ps_pu_results = as_inter_pu_results;
{
WORD32 ref_ctr;
s_ctb_prms.i4_ctb_x = i4_ctb_x << 6;
s_ctb_prms.i4_ctb_y = i4_ctb_y << 6;
/* MV limit is different based on ref. PIC */
for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
{
SCALE_RANGE_PRMS(as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
SCALE_RANGE_PRMS(as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
}
e_merge_result = CU_SPLIT;
merge_count_32x32 = 0;
if((en_merge_32x32 & 1) && (en_merge_execution & 1))
{
range_prms_t *ps_pic_limit;
if(s_merge_prms_32x32_tl.i4_use_rec == 1)
{
ps_pic_limit = &s_pic_limit_rec;
}
else
{
ps_pic_limit = &s_pic_limit_inp;
}
/* MV limit is different based on ref. PIC */
for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
{
hme_derive_search_range(
s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
ps_pic_limit,
&as_mv_limit[ref_ctr],
i4_ctb_x << 6,
i4_ctb_y << 6,
32,
32);
SCALE_RANGE_PRMS_POINTERS(
s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
2);
}
s_merge_prms_32x32_tl.i4_ctb_x_off = i4_ctb_x << 6;
s_merge_prms_32x32_tl.i4_ctb_y_off = i4_ctb_y << 6;
s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[0];
e_merge_result = hme_try_merge_high_speed(
ps_thrd_ctxt,
ps_ctxt,
ps_cur_ipe_ctb,
&s_subpel_prms,
&s_merge_prms_32x32_tl,
ps_pu_results,
&as_pu_results[0][0][0]);
if(e_merge_result == CU_MERGED)
{
inter_cu_results_t *ps_cu_results =
s_merge_prms_32x32_tl.ps_results_merge->ps_cu_results;
if(!((ps_cu_results->u1_num_best_results == 1) &&
(ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
{
hme_map_mvs_to_grid(
&aps_mv_grid[0],
s_merge_prms_32x32_tl.ps_results_merge,
s_merge_prms_32x32_tl.au1_pred_dir_searched,
s_merge_prms_32x32_tl.i4_num_pred_dir_actual);
}
if(ME_PRISTINE_QUALITY != e_me_quality_presets)
{
ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
.ps_child_node_tl->is_node_valid = 1;
NULLIFY_THE_CHILDREN_NODES(
ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
.ps_child_node_tl);
}
merge_count_32x32++;
e_merge_result = CU_SPLIT;
}
else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
{
#if ENABLE_CU_TREE_CULLING
cur_ctb_cu_tree_t *ps_tree =
ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
en_merge_execution = (en_merge_execution & (~(1 << 4)));
ENABLE_THE_CHILDREN_NODES(ps_tree);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
#endif
}
}
else if((en_merge_32x32 & 1) && (!(en_merge_execution & 1)))
{
#if ENABLE_CU_TREE_CULLING
cur_ctb_cu_tree_t *ps_tree =
ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
ENABLE_THE_CHILDREN_NODES(ps_tree);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
#endif
if(au1_is_32x32Blk_noisy[0] && DISABLE_INTRA_WHEN_NOISY)
{
ps_tree->is_node_valid = 0;
ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
en_merge_execution = (en_merge_execution & (~(1 << 4)));
}
}
if((en_merge_32x32 & 2) && (en_merge_execution & 2))
{
range_prms_t *ps_pic_limit;
if(s_merge_prms_32x32_tr.i4_use_rec == 1)
{
ps_pic_limit = &s_pic_limit_rec;
}
else
{
ps_pic_limit = &s_pic_limit_inp;
}
/* MV limit is different based on ref. PIC */
for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
{
hme_derive_search_range(
s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
ps_pic_limit,
&as_mv_limit[ref_ctr],
(i4_ctb_x << 6) + 32,
i4_ctb_y << 6,
32,
32);
SCALE_RANGE_PRMS_POINTERS(
s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
2);
}
s_merge_prms_32x32_tr.i4_ctb_x_off = i4_ctb_x << 6;
s_merge_prms_32x32_tr.i4_ctb_y_off = i4_ctb_y << 6;
s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[1];
e_merge_result = hme_try_merge_high_speed(
ps_thrd_ctxt,
ps_ctxt,
ps_cur_ipe_ctb,
&s_subpel_prms,
&s_merge_prms_32x32_tr,
ps_pu_results,
&as_pu_results[0][0][0]);
if(e_merge_result == CU_MERGED)
{
inter_cu_results_t *ps_cu_results =
s_merge_prms_32x32_tr.ps_results_merge->ps_cu_results;
if(!((ps_cu_results->u1_num_best_results == 1) &&
(ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
{
hme_map_mvs_to_grid(
&aps_mv_grid[0],
s_merge_prms_32x32_tr.ps_results_merge,
s_merge_prms_32x32_tr.au1_pred_dir_searched,
s_merge_prms_32x32_tr.i4_num_pred_dir_actual);
}
if(ME_PRISTINE_QUALITY != e_me_quality_presets)
{
ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
.ps_child_node_tr->is_node_valid = 1;
NULLIFY_THE_CHILDREN_NODES(
ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
.ps_child_node_tr);
}
merge_count_32x32++;
e_merge_result = CU_SPLIT;
}
else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
{
#if ENABLE_CU_TREE_CULLING
cur_ctb_cu_tree_t *ps_tree =
ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
en_merge_execution = (en_merge_execution & (~(1 << 4)));
ENABLE_THE_CHILDREN_NODES(ps_tree);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
#endif
}
}
else if((en_merge_32x32 & 2) && (!(en_merge_execution & 2)))
{
#if ENABLE_CU_TREE_CULLING
cur_ctb_cu_tree_t *ps_tree =
ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
ENABLE_THE_CHILDREN_NODES(ps_tree);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
#endif
if(au1_is_32x32Blk_noisy[1] && DISABLE_INTRA_WHEN_NOISY)
{
ps_tree->is_node_valid = 0;
ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
en_merge_execution = (en_merge_execution & (~(1 << 4)));
}
}
if((en_merge_32x32 & 4) && (en_merge_execution & 4))
{
range_prms_t *ps_pic_limit;
if(s_merge_prms_32x32_bl.i4_use_rec == 1)
{
ps_pic_limit = &s_pic_limit_rec;
}
else
{
ps_pic_limit = &s_pic_limit_inp;
}
/* MV limit is different based on ref. PIC */
for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
{
hme_derive_search_range(
s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
ps_pic_limit,
&as_mv_limit[ref_ctr],
i4_ctb_x << 6,
(i4_ctb_y << 6) + 32,
32,
32);
SCALE_RANGE_PRMS_POINTERS(
s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
2);
}
s_merge_prms_32x32_bl.i4_ctb_x_off = i4_ctb_x << 6;
s_merge_prms_32x32_bl.i4_ctb_y_off = i4_ctb_y << 6;
s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[2];
e_merge_result = hme_try_merge_high_speed(
ps_thrd_ctxt,
ps_ctxt,
ps_cur_ipe_ctb,
&s_subpel_prms,
&s_merge_prms_32x32_bl,
ps_pu_results,
&as_pu_results[0][0][0]);
if(e_merge_result == CU_MERGED)
{
inter_cu_results_t *ps_cu_results =
s_merge_prms_32x32_bl.ps_results_merge->ps_cu_results;
if(!((ps_cu_results->u1_num_best_results == 1) &&
(ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
{
hme_map_mvs_to_grid(
&aps_mv_grid[0],
s_merge_prms_32x32_bl.ps_results_merge,
s_merge_prms_32x32_bl.au1_pred_dir_searched,
s_merge_prms_32x32_bl.i4_num_pred_dir_actual);
}
if(ME_PRISTINE_QUALITY != e_me_quality_presets)
{
ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
.ps_child_node_bl->is_node_valid = 1;
NULLIFY_THE_CHILDREN_NODES(
ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
.ps_child_node_bl);
}
merge_count_32x32++;
e_merge_result = CU_SPLIT;
}
else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
{
#if ENABLE_CU_TREE_CULLING
cur_ctb_cu_tree_t *ps_tree =
ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
en_merge_execution = (en_merge_execution & (~(1 << 4)));
ENABLE_THE_CHILDREN_NODES(ps_tree);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
#endif
}
}
else if((en_merge_32x32 & 4) && (!(en_merge_execution & 4)))
{
#if ENABLE_CU_TREE_CULLING
cur_ctb_cu_tree_t *ps_tree =
ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
ENABLE_THE_CHILDREN_NODES(ps_tree);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
#endif
if(au1_is_32x32Blk_noisy[2] && DISABLE_INTRA_WHEN_NOISY)
{
ps_tree->is_node_valid = 0;
ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
en_merge_execution = (en_merge_execution & (~(1 << 4)));
}
}
if((en_merge_32x32 & 8) && (en_merge_execution & 8))
{
range_prms_t *ps_pic_limit;
if(s_merge_prms_32x32_br.i4_use_rec == 1)
{
ps_pic_limit = &s_pic_limit_rec;
}
else
{
ps_pic_limit = &s_pic_limit_inp;
}
/* MV limit is different based on ref. PIC */
for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
{
hme_derive_search_range(
s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
ps_pic_limit,
&as_mv_limit[ref_ctr],
(i4_ctb_x << 6) + 32,
(i4_ctb_y << 6) + 32,
32,
32);
SCALE_RANGE_PRMS_POINTERS(
s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
2);
}
s_merge_prms_32x32_br.i4_ctb_x_off = i4_ctb_x << 6;
s_merge_prms_32x32_br.i4_ctb_y_off = i4_ctb_y << 6;
s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[3];
e_merge_result = hme_try_merge_high_speed(
ps_thrd_ctxt,
ps_ctxt,
ps_cur_ipe_ctb,
&s_subpel_prms,
&s_merge_prms_32x32_br,
ps_pu_results,
&as_pu_results[0][0][0]);
if(e_merge_result == CU_MERGED)
{
/*inter_cu_results_t *ps_cu_results = s_merge_prms_32x32_br.ps_results_merge->ps_cu_results;
if(!((ps_cu_results->u1_num_best_results == 1) &&
(ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
{
hme_map_mvs_to_grid
(
&aps_mv_grid[0],
s_merge_prms_32x32_br.ps_results_merge,
s_merge_prms_32x32_br.au1_pred_dir_searched,
s_merge_prms_32x32_br.i4_num_pred_dir_actual
);
}*/
if(ME_PRISTINE_QUALITY != e_me_quality_presets)
{
ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
.ps_child_node_br->is_node_valid = 1;
NULLIFY_THE_CHILDREN_NODES(
ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
.ps_child_node_br);
}
merge_count_32x32++;
e_merge_result = CU_SPLIT;
}
else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
{
#if ENABLE_CU_TREE_CULLING
cur_ctb_cu_tree_t *ps_tree =
ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
en_merge_execution = (en_merge_execution & (~(1 << 4)));
ENABLE_THE_CHILDREN_NODES(ps_tree);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
#endif
}
}
else if((en_merge_32x32 & 8) && (!(en_merge_execution & 8)))
{
#if ENABLE_CU_TREE_CULLING
cur_ctb_cu_tree_t *ps_tree =
ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
ENABLE_THE_CHILDREN_NODES(ps_tree);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
#endif
if(au1_is_32x32Blk_noisy[3] && DISABLE_INTRA_WHEN_NOISY)
{
ps_tree->is_node_valid = 0;
ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
en_merge_execution = (en_merge_execution & (~(1 << 4)));
}
}
/* Try merging all 32x32 to 64x64 candts */
if(((en_merge_32x32 & 0xf) == 0xf) &&
(((merge_count_32x32 == 4) && (e_me_quality_presets != ME_PRISTINE_QUALITY)) ||
((en_merge_execution & 16) && (e_me_quality_presets == ME_PRISTINE_QUALITY))))
if((((e_me_quality_presets == ME_XTREME_SPEED_25) &&
!DISABLE_64X64_BLOCK_MERGE_IN_ME_IN_XS25) ||
(e_me_quality_presets != ME_XTREME_SPEED_25)))
{
range_prms_t *ps_pic_limit;
if(s_merge_prms_64x64.i4_use_rec == 1)
{
ps_pic_limit = &s_pic_limit_rec;
}
else
{
ps_pic_limit = &s_pic_limit_inp;
}
/* MV limit is different based on ref. PIC */
for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
{
hme_derive_search_range(
s_merge_prms_64x64.aps_mv_range[ref_ctr],
ps_pic_limit,
&as_mv_limit[ref_ctr],
i4_ctb_x << 6,
i4_ctb_y << 6,
64,
64);
SCALE_RANGE_PRMS_POINTERS(
s_merge_prms_64x64.aps_mv_range[ref_ctr],
s_merge_prms_64x64.aps_mv_range[ref_ctr],
2);
}
s_merge_prms_64x64.i4_ctb_x_off = i4_ctb_x << 6;
s_merge_prms_64x64.i4_ctb_y_off = i4_ctb_y << 6;
s_subpel_prms.u1_is_cu_noisy = au1_is_64x64Blk_noisy[0];
e_merge_result = hme_try_merge_high_speed(
ps_thrd_ctxt,
ps_ctxt,
ps_cur_ipe_ctb,
&s_subpel_prms,
&s_merge_prms_64x64,
ps_pu_results,
&as_pu_results[0][0][0]);
if((e_merge_result == CU_MERGED) &&
(ME_PRISTINE_QUALITY != e_me_quality_presets))
{
ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
.is_node_valid = 1;
NULLIFY_THE_CHILDREN_NODES(
ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE));
}
else if(
(e_merge_result == CU_SPLIT) &&
(ME_PRISTINE_QUALITY == e_me_quality_presets))
{
ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
.is_node_valid = 0;
}
}
/*****************************************************************/
/* UPDATION OF RESULT TO EXTERNAL STRUCTURES */
/*****************************************************************/
pf_ext_update_fxn((void *)ps_thrd_ctxt, (void *)ps_ctxt, i4_ctb_x, i4_ctb_y);
{
#ifdef _DEBUG
S32 wd = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
? 64
: i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
S32 ht = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
? 64
: i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
ASSERT(
(wd * ht) ==
ihevce_compute_area_of_valid_cus_in_ctb(
&ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]));
#endif
}
}
/* set the dependency for the corresponding row in enc loop */
ihevce_dmgr_set_row_row_sync(
pv_dep_mngr_encloop_dep_me,
(i4_ctb_x + 1),
i4_ctb_y,
tile_col_idx /* Col Tile No. */);
left_ctb_in_diff_tile = 0;
}
}
}
/**
********************************************************************************
* @fn void hme_refine_no_encode(coarse_me_ctxt_t *ps_ctxt,
* refine_layer_prms_t *ps_refine_prms)
*
* @brief Top level entry point for refinement ME
*
* @param[in,out] ps_ctxt: ME Handle
*
* @param[in] ps_refine_prms : refinement layer prms
*
* @return None
********************************************************************************
*/
void hme_refine_no_encode(
coarse_me_ctxt_t *ps_ctxt,
refine_prms_t *ps_refine_prms,
multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
S32 lyr_job_type,
WORD32 i4_ping_pong,
void **ppv_dep_mngr_hme_sync)
{
BLK_SIZE_T e_search_blk_size, e_result_blk_size;
ME_QUALITY_PRESETS_T e_me_quality_presets =
ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
/*************************************************************************/
/* Complexity of search: Low to High */
/*************************************************************************/
SEARCH_COMPLEXITY_T e_search_complexity;
/*************************************************************************/
/* Config parameter structures for varius ME submodules */
/*************************************************************************/
hme_search_prms_t s_search_prms_blk;
mvbank_update_prms_t s_mv_update_prms;
/*************************************************************************/
/* All types of search candidates for predictor based search. */
/*************************************************************************/
S32 num_init_candts = 0;
search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
search_node_t as_top_neighbours[4], as_left_neighbours[3];
search_node_t *ps_candt_zeromv, *ps_candt_tl, *ps_candt_tr;
search_node_t *ps_candt_l, *ps_candt_t;
search_node_t *ps_candt_prj_br[2], *ps_candt_prj_b[2], *ps_candt_prj_r[2];
search_node_t *ps_candt_prj_bl[2];
search_node_t *ps_candt_prj_tr[2], *ps_candt_prj_t[2], *ps_candt_prj_tl[2];
search_node_t *ps_candt_prj_coloc[2];
pf_get_wt_inp fp_get_wt_inp;
search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
U32 au4_unique_node_map[MAP_X_MAX * 2];
/*EIID */
WORD32 i4_num_inter_wins = 0; //debug code to find stat of
WORD32 i4_num_comparisions = 0; //debug code
WORD32 i4_threshold_multiplier;
WORD32 i4_threshold_divider;
WORD32 i4_temporal_layer =
ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_temporal_lyr_id;
/*************************************************************************/
/* points ot the search results for the blk level search (8x8/16x16) */
/*************************************************************************/
search_results_t *ps_search_results;
/*************************************************************************/
/* Coordinates */
/*************************************************************************/
S32 blk_x, i4_ctb_x, blk_id_in_ctb;
//S32 i4_ctb_y;
S32 pos_x, pos_y;
S32 blk_id_in_full_ctb;
S32 i4_num_srch_cands;
S32 blk_y;
/*************************************************************************/
/* Related to dimensions of block being searched and pic dimensions */
/*************************************************************************/
S32 blk_wd, blk_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic;
S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
S32 num_results_prev_layer;
/*************************************************************************/
/* Size of a basic unit for this layer. For non encode layers, we search */
/* in block sizes of 8x8. For encode layers, though we search 16x16s the */
/* basic unit size is the ctb size. */
/*************************************************************************/
S32 unit_size;
/*************************************************************************/
/* Pointers to context in current and coarser layers */
/*************************************************************************/
layer_ctxt_t *ps_curr_layer, *ps_coarse_layer;
/*************************************************************************/
/* to store mv range per blk, and picture limit, allowed search range */
/* range prms in hpel and qpel units as well */
/*************************************************************************/
range_prms_t s_range_prms_inp, s_range_prms_rec;
range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
/*************************************************************************/
/* These variables are used to track number of references at different */
/* stages of ME. */
/*************************************************************************/
S32 i4_num_ref_fpel, i4_num_ref_before_merge;
S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
S32 lambda_inp = ps_refine_prms->lambda_inp;
/*************************************************************************/
/* When a layer is implicit, it means that it searches on 1 or 2 ref idx */
/* Explicit means it searches on all active ref idx. */
/*************************************************************************/
S32 curr_layer_implicit, prev_layer_implicit;
/*************************************************************************/
/* Variables for loop counts */
/*************************************************************************/
S32 id;
S08 i1_ref_idx;
/*************************************************************************/
/* Input pointer and stride */
/*************************************************************************/
U08 *pu1_inp;
S32 i4_inp_stride;
S32 end_of_frame;
S32 num_sync_units_in_row;
PF_HME_PROJECT_COLOC_CANDT_FXN pf_hme_project_coloc_candt;
ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
/*************************************************************************/
/* Pointers to current and coarse layer are needed for projection */
/* Pointer to prev layer are needed for other candts like coloc */
/*************************************************************************/
ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
ps_coarse_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id + 1];
num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
/* Function pointer is selected based on the C vc X86 macro */
fp_get_wt_inp = ((ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list)
->pf_get_wt_inp_8x8;
i4_inp_stride = ps_curr_layer->i4_inp_stride;
i4_pic_wd = ps_curr_layer->i4_wd;
i4_pic_ht = ps_curr_layer->i4_ht;
e_search_complexity = ps_refine_prms->e_search_complexity;
end_of_frame = 0;
/* If the previous layer is non-encode layer, then use dyadic projection */
if(0 == ps_ctxt->u1_encode[ps_refine_prms->i4_layer_id + 1])
pf_hme_project_coloc_candt = hme_project_coloc_candt_dyadic;
else
pf_hme_project_coloc_candt = hme_project_coloc_candt;
/* This points to all the initial candts */
ps_search_candts = &as_search_candts[0];
{
e_search_blk_size = BLK_8x8;
blk_wd = blk_ht = 8;
blk_size_shift = 3;
s_mv_update_prms.i4_shift = 0;
/*********************************************************************/
/* In case we do not encode this layer, we search 8x8 with or without*/
/* enable 4x4 SAD. */
/*********************************************************************/
{
S32 i4_mask = (ENABLE_2Nx2N);
e_result_blk_size = BLK_8x8;
if(ps_refine_prms->i4_enable_4x4_part)
{
i4_mask |= (ENABLE_NxN);
e_result_blk_size = BLK_4x4;
s_mv_update_prms.i4_shift = 1;
}
s_search_prms_blk.i4_part_mask = i4_mask;
}
unit_size = blk_wd;
s_search_prms_blk.i4_inp_stride = unit_size;
}
/* This is required to properly update the layer mv bank */
s_mv_update_prms.e_search_blk_size = e_search_blk_size;
s_search_prms_blk.e_blk_size = e_search_blk_size;
/*************************************************************************/
/* If current layer is explicit, then the number of ref frames are to */
/* be same as previous layer. Else it will be 2 */
/*************************************************************************/
i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
if(ps_refine_prms->explicit_ref)
{
curr_layer_implicit = 0;
i4_num_ref_fpel = i4_num_ref_prev_layer;
/* 100578 : Using same mv cost fun. for all presets. */
s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_refine;
}
else
{
i4_num_ref_fpel = 2;
curr_layer_implicit = 1;
{
if(ME_MEDIUM_SPEED > e_me_quality_presets)
{
s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit;
}
else
{
#if USE_MODIFIED == 1
s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
#else
s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
#endif
}
}
}
i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
if(ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type ==
IV_IDR_FRAME ||
ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type == IV_I_FRAME)
{
i4_num_ref_fpel = 1;
}
if(i4_num_ref_prev_layer <= 2)
{
prev_layer_implicit = 1;
curr_layer_implicit = 1;
i4_num_ref_each_dir = 1;
}
else
{
/* It is assumed that we have equal number of references in each dir */
//ASSERT(!(i4_num_ref_prev_layer & 1));
prev_layer_implicit = 0;
i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
}
s_mv_update_prms.i4_num_ref = i4_num_ref_fpel;
s_mv_update_prms.i4_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
s_mv_update_prms.i4_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
/* this can be kept to 1 or 2 */
i4_num_ref_before_merge = 2;
i4_num_ref_before_merge = MIN(i4_num_ref_before_merge, i4_num_ref_fpel);
/* Set up place holders to hold the search nodes of each initial candt */
for(i = 0; i < MAX_INIT_CANDTS; i++)
{
ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
}
/* redundant, but doing it here since it is used in pred ctxt init */
ps_candt_zeromv = ps_search_candts[0].ps_search_node;
for(i = 0; i < 3; i++)
{
search_node_t *ps_search_node;
ps_search_node = &as_left_neighbours[i];
INIT_SEARCH_NODE(ps_search_node, 0);
ps_search_node = &as_top_neighbours[i];
INIT_SEARCH_NODE(ps_search_node, 0);
}
INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
/* bottom left node always not available for the blk being searched */
as_left_neighbours[2].u1_is_avail = 0;
/*************************************************************************/
/* Initialize all the search results structure here. We update all the */
/* search results to default values, and configure things like blk sizes */
/*************************************************************************/
if(ps_refine_prms->i4_encode == 0)
{
S32 pred_lx;
search_results_t *ps_search_results;
ps_search_results = &ps_ctxt->s_search_results_8x8;
hme_init_search_results(
ps_search_results,
i4_num_ref_fpel,
ps_refine_prms->i4_num_fpel_results,
ps_refine_prms->i4_num_results_per_part,
e_search_blk_size,
0,
0,
&ps_ctxt->au1_is_past[0]);
for(pred_lx = 0; pred_lx < 2; pred_lx++)
{
hme_init_pred_ctxt_no_encode(
&ps_search_results->as_pred_ctxt[pred_lx],
ps_search_results,
&as_top_neighbours[0],
&as_left_neighbours[0],
&ps_candt_prj_coloc[0],
ps_candt_zeromv,
ps_candt_zeromv,
pred_lx,
lambda_inp,
ps_refine_prms->lambda_q_shift,
&ps_ctxt->apu1_ref_bits_tlu_lc[0],
&ps_ctxt->ai2_ref_scf[0]);
}
}
/*********************************************************************/
/* Initialize the dyn. search range params. for each reference index */
/* in current layer ctxt */
/*********************************************************************/
/* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
{
WORD32 ref_ctr;
for(ref_ctr = 0; ref_ctr < s_mv_update_prms.i4_num_ref; ref_ctr++)
{
INIT_DYN_SEARCH_PRMS(
&ps_ctxt->s_coarse_dyn_range_prms
.as_dyn_range_prms[ps_refine_prms->i4_layer_id][ref_ctr],
ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
}
}
/* Next set up initial candidates according to a given set of rules. */
/* The number of initial candidates affects the quality of ME in the */
/* case of motion with multiple degrees of freedom. In case of simple */
/* translational motion, a current and a few causal and non causal */
/* candts would suffice. More candidates help to cover more complex */
/* cases like partitions, rotation/zoom, occlusion in/out, fine motion */
/* where multiple ref helps etc. */
/* The candidate choice also depends on the following parameters. */
/* e_search_complexity: SRCH_CX_LOW, SRCH_CX_MED, SRCH_CX_HIGH */
/* Whether we encode or not, and the type of search across reference */
/* i.e. the previous layer may have been explicit/implicit and curr */
/* layer may be explicit/implicit */
/* 0, 0, L, T, projected coloc best always presnt by default */
id = hme_decide_search_candidate_priority_in_l1_and_l2_me(ZERO_MV, e_me_quality_presets);
ps_candt_zeromv = ps_search_candts[id].ps_search_node;
ps_search_candts[id].u1_num_steps_refine = 0;
ps_candt_zeromv->s_mv.i2_mvx = 0;
ps_candt_zeromv->s_mv.i2_mvy = 0;
id = hme_decide_search_candidate_priority_in_l1_and_l2_me(SPATIAL_LEFT0, e_me_quality_presets);
ps_candt_l = ps_search_candts[id].ps_search_node;
ps_search_candts[id].u1_num_steps_refine = 0;
/* Even in ME_HIGH_SPEED mode, in layer 0, blocks */
/* not at the CTB boundary use the causal T and */
/* not the projected T, although the candidate is */
/* still pointed to by ps_candt_prj_t[0] */
if(ME_MEDIUM_SPEED <= e_me_quality_presets)
{
/* Using Projected top to eliminate sync */
id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
PROJECTED_TOP0, e_me_quality_presets);
ps_candt_prj_t[0] = ps_search_candts[id].ps_search_node;
ps_search_candts[id].u1_num_steps_refine = 1;
}
else
{
id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
SPATIAL_TOP0, e_me_quality_presets);
ps_candt_t = ps_search_candts[id].ps_search_node;
ps_search_candts[id].u1_num_steps_refine = 0;
}
id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
PROJECTED_COLOC0, e_me_quality_presets);
ps_candt_prj_coloc[0] = ps_search_candts[id].ps_search_node;
ps_search_candts[id].u1_num_steps_refine = 1;
id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
PROJECTED_COLOC1, e_me_quality_presets);
ps_candt_prj_coloc[1] = ps_search_candts[id].ps_search_node;
ps_search_candts[id].u1_num_steps_refine = 1;
if(ME_MEDIUM_SPEED <= e_me_quality_presets)
{
id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
PROJECTED_TOP_RIGHT0, e_me_quality_presets);
ps_candt_prj_tr[0] = ps_search_candts[id].ps_search_node;
ps_search_candts[id].u1_num_steps_refine = 1;
id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
PROJECTED_TOP_LEFT0, e_me_quality_presets);
ps_candt_prj_tl[0] = ps_search_candts[id].ps_search_node;
ps_search_candts[id].u1_num_steps_refine = 1;
}
else
{
id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
SPATIAL_TOP_RIGHT0, e_me_quality_presets);
ps_candt_tr = ps_search_candts[id].ps_search_node;
ps_search_candts[id].u1_num_steps_refine = 0;
id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
SPATIAL_TOP_LEFT0, e_me_quality_presets);
ps_candt_tl = ps_search_candts[id].ps_search_node;
ps_search_candts[id].u1_num_steps_refine = 0;
}
id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
PROJECTED_RIGHT0, e_me_quality_presets);
ps_candt_prj_r[0] = ps_search_candts[id].ps_search_node;
ps_search_candts[id].u1_num_steps_refine = 1;
id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
PROJECTED_BOTTOM0, e_me_quality_presets);
ps_candt_prj_b[0] = ps_search_candts[id].ps_search_node;
ps_search_candts[id].u1_num_steps_refine = 1;
id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
PROJECTED_BOTTOM_RIGHT0, e_me_quality_presets);
ps_candt_prj_br[0] = ps_search_candts[id].ps_search_node;
ps_search_candts[id].u1_num_steps_refine = 1;
id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
PROJECTED_BOTTOM_LEFT0, e_me_quality_presets);
ps_candt_prj_bl[0] = ps_search_candts[id].ps_search_node;
ps_search_candts[id].u1_num_steps_refine = 1;
id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
PROJECTED_RIGHT1, e_me_quality_presets);
ps_candt_prj_r[1] = ps_search_candts[id].ps_search_node;
ps_search_candts[id].u1_num_steps_refine = 1;
id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
PROJECTED_BOTTOM1, e_me_quality_presets);
ps_candt_prj_b[1] = ps_search_candts[id].ps_search_node;
ps_search_candts[id].u1_num_steps_refine = 1;
id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
PROJECTED_BOTTOM_RIGHT1, e_me_quality_presets);
ps_candt_prj_br[1] = ps_search_candts[id].ps_search_node;
ps_search_candts[id].u1_num_steps_refine = 1;
id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
PROJECTED_BOTTOM_LEFT1, e_me_quality_presets);
ps_candt_prj_bl[1] = ps_search_candts[id].ps_search_node;
ps_search_candts[id].u1_num_steps_refine = 1;
id = hme_decide_search_candidate_priority_in_l1_and_l2_me(PROJECTED_TOP1, e_me_quality_presets);
ps_candt_prj_t[1] = ps_search_candts[id].ps_search_node;
ps_search_candts[id].u1_num_steps_refine = 1;
id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
PROJECTED_TOP_RIGHT1, e_me_quality_presets);
ps_candt_prj_tr[1] = ps_search_candts[id].ps_search_node;
ps_search_candts[id].u1_num_steps_refine = 1;
id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
PROJECTED_TOP_LEFT1, e_me_quality_presets);
ps_candt_prj_tl[1] = ps_search_candts[id].ps_search_node;
ps_search_candts[id].u1_num_steps_refine = 1;
/*************************************************************************/
/* Now that the candidates have been ordered, to choose the right number */
/* of initial candidates. */
/*************************************************************************/
if(curr_layer_implicit && !prev_layer_implicit)
{
if(e_search_complexity == SEARCH_CX_LOW)
num_init_candts = 7;
else if(e_search_complexity == SEARCH_CX_MED)
num_init_candts = 13;
else if(e_search_complexity == SEARCH_CX_HIGH)
num_init_candts = 18;
else
ASSERT(0);
}
else
{
if(e_search_complexity == SEARCH_CX_LOW)
num_init_candts = 5;
else if(e_search_complexity == SEARCH_CX_MED)
num_init_candts = 11;
else if(e_search_complexity == SEARCH_CX_HIGH)
num_init_candts = 16;
else
ASSERT(0);
}
if(ME_XTREME_SPEED_25 == e_me_quality_presets)
{
num_init_candts = NUM_INIT_SEARCH_CANDS_IN_L1_AND_L2_ME_IN_XS25;
}
/*************************************************************************/
/* The following search parameters are fixed throughout the search across*/
/* all blks. So these are configured outside processing loop */
/*************************************************************************/
s_search_prms_blk.i4_num_init_candts = num_init_candts;
s_search_prms_blk.i4_start_step = 1;
s_search_prms_blk.i4_use_satd = 0;
s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
/* we use recon only for encoded layers, otherwise it is not available */
s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
s_search_prms_blk.ps_search_candts = ps_search_candts;
/* We use the same mv_range for all ref. pic. So assign to member 0 */
if(s_search_prms_blk.i4_use_rec)
s_search_prms_blk.aps_mv_range[0] = &s_range_prms_rec;
else
s_search_prms_blk.aps_mv_range[0] = &s_range_prms_inp;
/*************************************************************************/
/* Initialize coordinates. Meaning as follows */
/* blk_x : x coordinate of the 16x16 blk, in terms of number of blks */
/* blk_y : same as above, y coord. */
/* num_blks_in_this_ctb : number of blks in this given ctb that starts */
/* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries. */
/* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left */
/* corner of the picture. Always multiple of 64. */
/* blk_id_in_ctb : encode order id of the blk in the ctb. */
/*************************************************************************/
blk_y = 0;
blk_id_in_ctb = 0;
GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic);
/* Get the number of sync units in a row based on encode/non enocde layer */
num_sync_units_in_row = num_blks_in_row;
/*************************************************************************/
/* Picture limit on all 4 sides. This will be used to set mv limits for */
/* every block given its coordinate. Note thsi assumes that the min amt */
/* of padding to right of pic is equal to the blk size. If we go all the */
/* way upto 64x64, then the min padding on right size of picture should */
/* be 64, and also on bottom side of picture. */
/*************************************************************************/
SET_PIC_LIMIT(
s_pic_limit_inp,
ps_curr_layer->i4_pad_x_inp,
ps_curr_layer->i4_pad_y_inp,
ps_curr_layer->i4_wd,
ps_curr_layer->i4_ht,
s_search_prms_blk.i4_num_steps_post_refine);
SET_PIC_LIMIT(
s_pic_limit_rec,
ps_curr_layer->i4_pad_x_rec,
ps_curr_layer->i4_pad_y_rec,
ps_curr_layer->i4_wd,
ps_curr_layer->i4_ht,
s_search_prms_blk.i4_num_steps_post_refine);
/*************************************************************************/
/* set the MV limit per ref. pic. */
/* - P pic. : Based on the config params. */
/* - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
/*************************************************************************/
{
WORD32 ref_ctr;
/* Only for B/b pic. */
if(1 == ps_ctxt->s_frm_prms.bidir_enabled)
{
WORD16 i2_mv_y_per_poc, i2_max_mv_y;
WORD32 cur_poc, ref_poc, abs_poc_diff;
cur_poc = ps_ctxt->i4_curr_poc;
/* Get abs MAX for symmetric search */
i2_mv_y_per_poc = MAX(
ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[ps_refine_prms->i4_layer_id],
(ABS(ps_ctxt->s_coarse_dyn_range_prms
.i2_dyn_min_y_per_poc[ps_refine_prms->i4_layer_id])));
for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
{
ref_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr];
abs_poc_diff = ABS((cur_poc - ref_poc));
/* Get the cur. max MV based on POC distance */
i2_max_mv_y = i2_mv_y_per_poc * abs_poc_diff;
i2_max_mv_y = MIN(i2_max_mv_y, ps_curr_layer->i2_max_mv_y);
as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
as_mv_limit[ref_ctr].i2_min_y = -i2_max_mv_y;
as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
as_mv_limit[ref_ctr].i2_max_y = i2_max_mv_y;
}
}
else
{
/* Set the Config. File Params for P pic. */
for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
{
as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
as_mv_limit[ref_ctr].i2_min_y = -ps_curr_layer->i2_max_mv_y;
as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
as_mv_limit[ref_ctr].i2_max_y = ps_curr_layer->i2_max_mv_y;
}
}
}
/* EIID: Calculate threshold based on quality preset and/or temporal layers */
if(e_me_quality_presets == ME_MEDIUM_SPEED)
{
i4_threshold_multiplier = 1;
i4_threshold_divider = 4;
}
else if(e_me_quality_presets == ME_HIGH_SPEED)
{
i4_threshold_multiplier = 1;
i4_threshold_divider = 2;
}
else if((e_me_quality_presets == ME_XTREME_SPEED) || (e_me_quality_presets == ME_XTREME_SPEED_25))
{
#if OLD_XTREME_SPEED
/* Hard coding the temporal ID value to 1, if it is older xtreme speed */
i4_temporal_layer = 1;
#endif
if(i4_temporal_layer == 0)
{
i4_threshold_multiplier = 3;
i4_threshold_divider = 4;
}
else if(i4_temporal_layer == 1)
{
i4_threshold_multiplier = 3;
i4_threshold_divider = 4;
}
else if(i4_temporal_layer == 2)
{
i4_threshold_multiplier = 1;
i4_threshold_divider = 1;
}
else
{
i4_threshold_multiplier = 5;
i4_threshold_divider = 4;
}
}
else if(e_me_quality_presets == ME_HIGH_QUALITY)
{
i4_threshold_multiplier = 1;
i4_threshold_divider = 1;
}
/*************************************************************************/
/*************************************************************************/
/*************************************************************************/
/* START OF THE CORE LOOP */
/* If Encode is 0, then we just loop over each blk */
/*************************************************************************/
/*************************************************************************/
/*************************************************************************/
while(0 == end_of_frame)
{
job_queue_t *ps_job;
ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_row; //EIID
WORD32 i4_ctb_row_ctr; //counter to calculate CTB row counter. It's (row_ctr /4)
WORD32 i4_num_ctbs_in_row = (num_blks_in_row + 3) / 4; //calculations verified for L1 only
//+3 to get ceil values when divided by 4
WORD32 i4_num_4x4_blocks_in_ctb_at_l1 =
8 * 8; //considering CTB size 32x32 at L1. hardcoded for now
//if there is variable for ctb size use that and this variable can be derived
WORD32 offset_val, check_dep_pos, set_dep_pos;
void *pv_hme_dep_mngr;
ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row;
/* Get the current layer HME Dep Mngr */
/* Note : Use layer_id - 1 in HME layers */
pv_hme_dep_mngr = ppv_dep_mngr_hme_sync[ps_refine_prms->i4_layer_id - 1];
/* Get the current row from the job queue */
ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job(
ps_multi_thrd_ctxt, lyr_job_type, 1, i4_ping_pong);
/* If all rows are done, set the end of process flag to 1, */
/* and the current row to -1 */
if(NULL == ps_job)
{
blk_y = -1;
end_of_frame = 1;
continue;
}
if(1 == ps_ctxt->s_frm_prms.is_i_pic)
{
/* set the output dependency of current row */
ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
continue;
}
blk_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
blk_x = 0;
i4_ctb_x = 0;
/* wait for Corresponding Pre intra Job to be completed */
if(1 == ps_refine_prms->i4_layer_id)
{
volatile UWORD32 i4_l1_done;
volatile UWORD32 *pi4_l1_done;
pi4_l1_done = (volatile UWORD32 *)&ps_multi_thrd_ctxt
->aai4_l1_pre_intra_done[i4_ping_pong][blk_y >> 2];
i4_l1_done = *pi4_l1_done;
while(!i4_l1_done)
{
i4_l1_done = *pi4_l1_done;
}
}
/* Set Variables for Dep. Checking and Setting */
set_dep_pos = blk_y + 1;
if(blk_y > 0)
{
offset_val = 2;
check_dep_pos = blk_y - 1;
}
else
{
/* First row should run without waiting */
offset_val = -1;
check_dep_pos = 0;
}
/* EIID: calculate ed_blk_ctxt pointer for current row */
/* valid for only layer-1. not varified and used for other layers */
i4_ctb_row_ctr = blk_y / 4;
ps_ed_blk_ctxt_curr_row =
ps_ctxt->ps_ed_blk + (i4_ctb_row_ctr * i4_num_ctbs_in_row *
i4_num_4x4_blocks_in_ctb_at_l1); //valid for L1 only
ps_ed_ctb_l1_row = ps_ctxt->ps_ed_ctb_l1 + (i4_ctb_row_ctr * i4_num_ctbs_in_row);
/* if non-encode layer then i4_ctb_x will be same as blk_x */
/* loop over all the units is a row */
for(; i4_ctb_x < num_sync_units_in_row; i4_ctb_x++)
{
ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_ctb; //EIDD
ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_curr;
WORD32 i4_ctb_blk_ctr = i4_ctb_x / 4;
/* Wait till top row block is processed */
/* Currently checking till top right block*/
/* Disabled since all candidates, except for */
/* L and C, are projected from the coarser layer, */
/* only in ME_HIGH_SPEED mode */
if((ME_MEDIUM_SPEED > e_me_quality_presets))
{
if(i4_ctb_x < (num_sync_units_in_row - 1))
{
ihevce_dmgr_chk_row_row_sync(
pv_hme_dep_mngr,
i4_ctb_x,
offset_val,
check_dep_pos,
0, /* Col Tile No. : Not supported in PreEnc*/
ps_ctxt->thrd_id);
}
}
{
/* for non encoder layer only one block is processed */
num_blks_in_this_ctb = 1;
}
/* EIID: derive ed_ctxt ptr for current CTB */
ps_ed_blk_ctxt_curr_ctb =
ps_ed_blk_ctxt_curr_row +
(i4_ctb_blk_ctr *
i4_num_4x4_blocks_in_ctb_at_l1); //currently valid for l1 layer only
ps_ed_ctb_l1_curr = ps_ed_ctb_l1_row + i4_ctb_blk_ctr;
/* loop over all the blocks in CTB will always be 1 */
for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
{
{
/* non encode layer */
blk_x = i4_ctb_x;
blk_id_in_full_ctb = 0;
s_search_prms_blk.i4_cu_x_off = s_search_prms_blk.i4_cu_y_off = 0;
}
/* get the current input blk point */
pos_x = blk_x << blk_size_shift;
pos_y = blk_y << blk_size_shift;
pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
/*********************************************************************/
/* replicate the inp buffer at blk or ctb level for each ref id, */
/* Instead of searching with wk * ref(k), we search with Ik = I / wk */
/* thereby avoiding a bloat up of memory. If we did all references */
/* weighted pred, we will end up with a duplicate copy of each ref */
/* at each layer, since we need to preserve the original reference. */
/* ToDo: Need to observe performance with this mechanism and compare */
/* with case where ref is weighted. */
/*********************************************************************/
if(blk_id_in_ctb == 0)
{
fp_get_wt_inp(
ps_curr_layer,
&ps_ctxt->s_wt_pred,
unit_size,
pos_x,
pos_y,
unit_size,
ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
ps_ctxt->i4_wt_pred_enable_flag);
}
s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
/* Select search results from a suitable search result in the context */
{
ps_search_results = &ps_ctxt->s_search_results_8x8;
}
s_search_prms_blk.ps_search_results = ps_search_results;
/* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
hme_reset_search_results(
ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
/* Loop across different Ref IDx */
for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref_fpel; i1_ref_idx++)
{
S32 next_blk_offset = (e_search_blk_size == BLK_16x16) ? 22 : 12;
S32 prev_blk_offset = 6;
S32 resultid;
/*********************************************************************/
/* For every blk in the picture, the search range needs to be derived*/
/* Any blk can have any mv, but practical search constraints are */
/* imposed by the picture boundary and amt of padding. */
/*********************************************************************/
/* MV limit is different based on ref. PIC */
hme_derive_search_range(
&s_range_prms_inp,
&s_pic_limit_inp,
&as_mv_limit[i1_ref_idx],
pos_x,
pos_y,
blk_wd,
blk_ht);
hme_derive_search_range(
&s_range_prms_rec,
&s_pic_limit_rec,
&as_mv_limit[i1_ref_idx],
pos_x,
pos_y,
blk_wd,
blk_ht);
s_search_prms_blk.i1_ref_idx = i1_ref_idx;
ps_candt_zeromv->i1_ref_idx = i1_ref_idx;
i4_num_srch_cands = 1;
if(1 != ps_refine_prms->i4_layer_id)
{
S32 x, y;
x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
if(ME_MEDIUM_SPEED > e_me_quality_presets)
{
hme_get_spatial_candt(
ps_curr_layer,
e_search_blk_size,
blk_x,
blk_y,
i1_ref_idx,
&as_top_neighbours[0],
&as_left_neighbours[0],
0,
((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
0,
ps_refine_prms->i4_encode);
*ps_candt_tr = as_top_neighbours[3];
*ps_candt_t = as_top_neighbours[1];
*ps_candt_tl = as_top_neighbours[0];
i4_num_srch_cands += 3;
}
else
{
layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
search_node_t *ps_search_node;
S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
hme_mv_t *ps_mv, *ps_mv_base;
S08 *pi1_ref_idx, *pi1_ref_idx_base;
S32 jump = 1, mvs_in_blk, mvs_in_row;
S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
if(i4_blk_size1 != i4_blk_size2)
{
blk_x_temp <<= 1;
blk_y_temp <<= 1;
jump = 2;
if((i4_blk_size1 << 2) == i4_blk_size2)
{
blk_x_temp <<= 1;
blk_y_temp <<= 1;
jump = 4;
}
}
mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
/* Adjust teh blk coord to point to top left locn */
blk_x_temp -= 1;
blk_y_temp -= 1;
/* Pick up the mvs from the location */
i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
ps_mv += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
pi1_ref_idx += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
ps_mv_base = ps_mv;
pi1_ref_idx_base = pi1_ref_idx;
ps_search_node = &as_left_neighbours[0];
ps_mv = ps_mv_base + mvs_in_row;
pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
COPY_MV_TO_SEARCH_NODE(
ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
i4_num_srch_cands++;
}
}
else
{
S32 x, y;
x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
if(ME_MEDIUM_SPEED > e_me_quality_presets)
{
hme_get_spatial_candt_in_l1_me(
ps_curr_layer,
e_search_blk_size,
blk_x,
blk_y,
i1_ref_idx,
!ps_search_results->pu1_is_past[i1_ref_idx],
&as_top_neighbours[0],
&as_left_neighbours[0],
0,
((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
0,
ps_ctxt->s_frm_prms.u1_num_active_ref_l0,
ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
*ps_candt_tr = as_top_neighbours[3];
*ps_candt_t = as_top_neighbours[1];
*ps_candt_tl = as_top_neighbours[0];
i4_num_srch_cands += 3;
}
else
{
layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
S32 i4_mv_pos_in_implicit_array;
search_node_t *ps_search_node;
S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
hme_mv_t *ps_mv, *ps_mv_base;
S08 *pi1_ref_idx, *pi1_ref_idx_base;
S32 jump = 1, mvs_in_blk, mvs_in_row;
S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
U08 u1_pred_dir = !ps_search_results->pu1_is_past[i1_ref_idx];
S32 i4_num_results_in_given_dir =
((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
ps_ctxt->s_frm_prms.u1_num_active_ref_l1)
: (ps_layer_mvbank->i4_num_mvs_per_ref *
ps_ctxt->s_frm_prms.u1_num_active_ref_l0));
if(i4_blk_size1 != i4_blk_size2)
{
blk_x_temp <<= 1;
blk_y_temp <<= 1;
jump = 2;
if((i4_blk_size1 << 2) == i4_blk_size2)
{
blk_x_temp <<= 1;
blk_y_temp <<= 1;
jump = 4;
}
}
mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
/* Adjust teh blk coord to point to top left locn */
blk_x_temp -= 1;
blk_y_temp -= 1;
/* Pick up the mvs from the location */
i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
i4_offset +=
((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
ps_ctxt->s_frm_prms.u1_num_active_ref_l0)
: 0);
ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
ps_mv_base = ps_mv;
pi1_ref_idx_base = pi1_ref_idx;
{
/* ps_mv and pi1_ref_idx now point to the top left locn */
ps_search_node = &as_left_neighbours[0];
ps_mv = ps_mv_base + mvs_in_row;
pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
i4_mv_pos_in_implicit_array =
hme_find_pos_of_implicitly_stored_ref_id(
pi1_ref_idx, i1_ref_idx, 0, i4_num_results_in_given_dir);
if(-1 != i4_mv_pos_in_implicit_array)
{
COPY_MV_TO_SEARCH_NODE(
ps_search_node,
&ps_mv[i4_mv_pos_in_implicit_array],
&pi1_ref_idx[i4_mv_pos_in_implicit_array],
i1_ref_idx,
shift);
}
else
{
ps_search_node->u1_is_avail = 0;
ps_search_node->s_mv.i2_mvx = 0;
ps_search_node->s_mv.i2_mvy = 0;
ps_search_node->i1_ref_idx = i1_ref_idx;
}
i4_num_srch_cands++;
}
}
}
*ps_candt_l = as_left_neighbours[0];
/* when 16x16 is searched in an encode layer, and the prev layer */
/* stores results for 4x4 blks, we project 5 candts corresponding */
/* to (2,2), (2,14), (14,2), 14,14) and 2nd best of (2,2) */
/* However in other cases, only 2,2 best and 2nd best reqd */
resultid = 0;
pf_hme_project_coloc_candt(
ps_candt_prj_coloc[0],
ps_curr_layer,
ps_coarse_layer,
pos_x + 2,
pos_y + 2,
i1_ref_idx,
resultid);
i4_num_srch_cands++;
resultid = 1;
if(num_results_prev_layer > 1)
{
pf_hme_project_coloc_candt(
ps_candt_prj_coloc[1],
ps_curr_layer,
ps_coarse_layer,
pos_x + 2,
pos_y + 2,
i1_ref_idx,
resultid);
i4_num_srch_cands++;
}
resultid = 0;
if(ME_MEDIUM_SPEED <= e_me_quality_presets)
{
pf_hme_project_coloc_candt(
ps_candt_prj_t[0],
ps_curr_layer,
ps_coarse_layer,
pos_x,
pos_y - prev_blk_offset,
i1_ref_idx,
resultid);
i4_num_srch_cands++;
}
{
pf_hme_project_coloc_candt(
ps_candt_prj_br[0],
ps_curr_layer,
ps_coarse_layer,
pos_x + next_blk_offset,
pos_y + next_blk_offset,
i1_ref_idx,
resultid);
pf_hme_project_coloc_candt(
ps_candt_prj_bl[0],
ps_curr_layer,
ps_coarse_layer,
pos_x - prev_blk_offset,
pos_y + next_blk_offset,
i1_ref_idx,
resultid);
pf_hme_project_coloc_candt(
ps_candt_prj_r[0],
ps_curr_layer,
ps_coarse_layer,
pos_x + next_blk_offset,
pos_y,
i1_ref_idx,
resultid);
pf_hme_project_coloc_candt(
ps_candt_prj_b[0],
ps_curr_layer,
ps_coarse_layer,
pos_x,
pos_y + next_blk_offset,
i1_ref_idx,
resultid);
i4_num_srch_cands += 4;
if(ME_MEDIUM_SPEED <= e_me_quality_presets)
{
pf_hme_project_coloc_candt(
ps_candt_prj_tr[0],
ps_curr_layer,
ps_coarse_layer,
pos_x + next_blk_offset,
pos_y - prev_blk_offset,
i1_ref_idx,
resultid);
pf_hme_project_coloc_candt(
ps_candt_prj_tl[0],
ps_curr_layer,
ps_coarse_layer,
pos_x - prev_blk_offset,
pos_y - prev_blk_offset,
i1_ref_idx,
resultid);
i4_num_srch_cands += 2;
}
}
if((num_results_prev_layer > 1) && (e_search_complexity >= SEARCH_CX_MED))
{
resultid = 1;
pf_hme_project_coloc_candt(
ps_candt_prj_br[1],
ps_curr_layer,
ps_coarse_layer,
pos_x + next_blk_offset,
pos_y + next_blk_offset,
i1_ref_idx,
resultid);
pf_hme_project_coloc_candt(
ps_candt_prj_bl[1],
ps_curr_layer,
ps_coarse_layer,
pos_x - prev_blk_offset,
pos_y + next_blk_offset,
i1_ref_idx,
resultid);
pf_hme_project_coloc_candt(
ps_candt_prj_r[1],
ps_curr_layer,
ps_coarse_layer,
pos_x + next_blk_offset,
pos_y,
i1_ref_idx,
resultid);
pf_hme_project_coloc_candt(
ps_candt_prj_b[1],
ps_curr_layer,
ps_coarse_layer,
pos_x,
pos_y + next_blk_offset,
i1_ref_idx,
resultid);
i4_num_srch_cands += 4;
pf_hme_project_coloc_candt(
ps_candt_prj_tr[1],
ps_curr_layer,
ps_coarse_layer,
pos_x + next_blk_offset,
pos_y - prev_blk_offset,
i1_ref_idx,
resultid);
pf_hme_project_coloc_candt(
ps_candt_prj_tl[1],
ps_curr_layer,
ps_coarse_layer,
pos_x - prev_blk_offset,
pos_y - prev_blk_offset,
i1_ref_idx,
resultid);
pf_hme_project_coloc_candt(
ps_candt_prj_t[1],
ps_curr_layer,
ps_coarse_layer,
pos_x,
pos_y - prev_blk_offset,
i1_ref_idx,
resultid);
i4_num_srch_cands += 3;
}
/* Note this block also clips the MV range for all candidates */
#ifdef _DEBUG
{
S32 candt;
range_prms_t *ps_range_prms;
S32 num_ref_valid = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
for(candt = 0; candt < i4_num_srch_cands; candt++)
{
search_node_t *ps_search_node;
ps_search_node =
s_search_prms_blk.ps_search_candts[candt].ps_search_node;
ps_range_prms = s_search_prms_blk.aps_mv_range[0];
if((ps_search_node->i1_ref_idx >= num_ref_valid) ||
(ps_search_node->i1_ref_idx < 0))
{
ASSERT(0);
}
}
}
#endif
{
S32 srch_cand;
S32 num_unique_nodes = 0;
S32 num_nodes_searched = 0;
S32 num_best_cand = 0;
S08 i1_grid_enable = 0;
search_node_t as_best_two_proj_node[TOT_NUM_PARTS * 2];
/* has list of valid partition to search terminated by -1 */
S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
S32 center_x;
S32 center_y;
/* indicates if the centre point of grid needs to be explicitly added for search */
S32 add_centre = 0;
memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
center_x = ps_candt_prj_coloc[0]->s_mv.i2_mvx;
center_y = ps_candt_prj_coloc[0]->s_mv.i2_mvy;
for(srch_cand = 0;
(srch_cand < i4_num_srch_cands) &&
(num_unique_nodes <= s_search_prms_blk.i4_num_init_candts);
srch_cand++)
{
search_node_t s_search_node_temp =
s_search_prms_blk.ps_search_candts[srch_cand].ps_search_node[0];
s_search_node_temp.i1_ref_idx = i1_ref_idx; //TEMP FIX;
/* Clip the motion vectors as well here since after clipping
two candidates can become same and they will be removed during deduplication */
CLIP_MV_WITHIN_RANGE(
s_search_node_temp.s_mv.i2_mvx,
s_search_node_temp.s_mv.i2_mvy,
s_search_prms_blk.aps_mv_range[0],
ps_refine_prms->i4_num_steps_fpel_refine,
ps_refine_prms->i4_num_steps_hpel_refine,
ps_refine_prms->i4_num_steps_qpel_refine);
/* PT_C */
INSERT_NEW_NODE(
as_unique_search_nodes,
num_unique_nodes,
s_search_node_temp,
0,
au4_unique_node_map,
center_x,
center_y,
1);
num_nodes_searched += 1;
}
num_unique_nodes =
MIN(num_unique_nodes, s_search_prms_blk.i4_num_init_candts);
/* If number of candidates projected/number of candidates to be refined are more than 2,
then filter out and choose the best two here */
if(num_unique_nodes >= 2)
{
S32 num_results;
S32 cnt;
S32 *pi4_valid_part_ids;
s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
pi4_valid_part_ids = &ai4_valid_part_ids[0];
/* pi4_valid_part_ids is updated inside */
hme_pred_search_no_encode(
&s_search_prms_blk,
ps_curr_layer,
&ps_ctxt->s_wt_pred,
pi4_valid_part_ids,
1,
e_me_quality_presets,
i1_grid_enable,
(ihevce_me_optimised_function_list_t *)
ps_ctxt->pv_me_optimised_function_list
);
num_best_cand = 0;
cnt = 0;
num_results = ps_search_results->u1_num_results_per_part;
while((id = pi4_valid_part_ids[cnt++]) >= 0)
{
num_results =
MIN(ps_refine_prms->pu1_num_best_results[id], num_results);
for(i = 0; i < num_results; i++)
{
search_node_t s_search_node_temp;
s_search_node_temp =
*(ps_search_results->aps_part_results[i1_ref_idx][id] + i);
if(s_search_node_temp.i1_ref_idx >= 0)
{
INSERT_NEW_NODE_NOMAP(
as_best_two_proj_node,
num_best_cand,
s_search_node_temp,
0);
}
}
}
}
else
{
add_centre = 1;
num_best_cand = num_unique_nodes;
as_best_two_proj_node[0] = as_unique_search_nodes[0];
}
num_unique_nodes = 0;
num_nodes_searched = 0;
if(1 == num_best_cand)
{
search_node_t s_search_node_temp = as_best_two_proj_node[0];
S16 i2_mv_x = s_search_node_temp.s_mv.i2_mvx;
S16 i2_mv_y = s_search_node_temp.s_mv.i2_mvy;
S08 i1_ref_idx = s_search_node_temp.i1_ref_idx;
i1_grid_enable = 1;
as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
if(add_centre)
{
as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
}
}
else
{
/* For the candidates where refinement was required, choose the best two */
for(srch_cand = 0; srch_cand < num_best_cand; srch_cand++)
{
search_node_t s_search_node_temp = as_best_two_proj_node[srch_cand];
WORD32 mv_x = s_search_node_temp.s_mv.i2_mvx;
WORD32 mv_y = s_search_node_temp.s_mv.i2_mvy;
/* Because there may not be two best unique candidates (because of clipping),
second best candidate can be uninitialized, ignore that */
if(s_search_node_temp.s_mv.i2_mvx == INTRA_MV ||
s_search_node_temp.i1_ref_idx < 0)
{
num_nodes_searched++;
continue;
}
/* PT_C */
/* Since the center point has already be evaluated and best results are persistent,
it will not be evaluated again */
if(add_centre) /* centre point added explicitly again if search results is not updated */
{
INSERT_NEW_NODE(
as_unique_search_nodes,
num_unique_nodes,
s_search_node_temp,
0,
au4_unique_node_map,
center_x,
center_y,
1);
}
/* PT_L */
s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
s_search_node_temp.s_mv.i2_mvy = mv_y;
INSERT_NEW_NODE(
as_unique_search_nodes,
num_unique_nodes,
s_search_node_temp,
0,
au4_unique_node_map,
center_x,
center_y,
1);
/* PT_T */
s_search_node_temp.s_mv.i2_mvx = mv_x;
s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
INSERT_NEW_NODE(
as_unique_search_nodes,
num_unique_nodes,
s_search_node_temp,
0,
au4_unique_node_map,
center_x,
center_y,
1);
/* PT_R */
s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
s_search_node_temp.s_mv.i2_mvy = mv_y;
INSERT_NEW_NODE(
as_unique_search_nodes,
num_unique_nodes,
s_search_node_temp,
0,
au4_unique_node_map,
center_x,
center_y,
1);
/* PT_B */
s_search_node_temp.s_mv.i2_mvx = mv_x;
s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
INSERT_NEW_NODE(
as_unique_search_nodes,
num_unique_nodes,
s_search_node_temp,
0,
au4_unique_node_map,
center_x,
center_y,
1);
/* PT_TL */
s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
INSERT_NEW_NODE(
as_unique_search_nodes,
num_unique_nodes,
s_search_node_temp,
0,
au4_unique_node_map,
center_x,
center_y,
1);
/* PT_TR */
s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
INSERT_NEW_NODE(
as_unique_search_nodes,
num_unique_nodes,
s_search_node_temp,
0,
au4_unique_node_map,
center_x,
center_y,
1);
/* PT_BL */
s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
INSERT_NEW_NODE(
as_unique_search_nodes,
num_unique_nodes,
s_search_node_temp,
0,
au4_unique_node_map,
center_x,
center_y,
1);
/* PT_BR */
s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
INSERT_NEW_NODE(
as_unique_search_nodes,
num_unique_nodes,
s_search_node_temp,
0,
au4_unique_node_map,
center_x,
center_y,
1);
}
}
s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
/*****************************************************************/
/* Call the search algorithm, this includes: */
/* Pre-Search-Refinement (for coarse candts) */
/* Search on each candidate */
/* Post Search Refinement on winners/other new candidates */
/*****************************************************************/
hme_pred_search_no_encode(
&s_search_prms_blk,
ps_curr_layer,
&ps_ctxt->s_wt_pred,
ai4_valid_part_ids,
0,
e_me_quality_presets,
i1_grid_enable,
(ihevce_me_optimised_function_list_t *)
ps_ctxt->pv_me_optimised_function_list);
i1_grid_enable = 0;
}
}
/* for non encode layer update MV and end processing for block */
{
WORD32 i4_ref_id, min_cost = 0x7fffffff, min_sad = 0;
search_node_t *ps_search_node;
/* now update the reqd results back to the layer mv bank. */
if(1 == ps_refine_prms->i4_layer_id)
{
hme_update_mv_bank_in_l1_me(
ps_search_results,
ps_curr_layer->ps_layer_mvbank,
blk_x,
blk_y,
&s_mv_update_prms);
}
else
{
hme_update_mv_bank_noencode(
ps_search_results,
ps_curr_layer->ps_layer_mvbank,
blk_x,
blk_y,
&s_mv_update_prms);
}
/* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
/* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
{
WORD32 i4_j;
layer_mv_t *ps_layer_mv = ps_curr_layer->ps_layer_mvbank;
//if (ps_layer_mv->e_blk_size == s_mv_update_prms.e_search_blk_size)
/* Not considering this for Dyn. Search Update */
{
for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
i4_ref_id++)
{
ps_search_node =
ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
{
hme_update_dynamic_search_params(
&ps_ctxt->s_coarse_dyn_range_prms
.as_dyn_range_prms[ps_refine_prms->i4_layer_id]
[i4_ref_id],
ps_search_node->s_mv.i2_mvy);
ps_search_node++;
}
}
}
}
if(1 == ps_refine_prms->i4_layer_id)
{
WORD32 wt_pred_val, log_wt_pred_val;
WORD32 ref_id_of_nearest_poc = 0;
WORD32 max_val = 0x7fffffff;
WORD32 max_l0_val = 0x7fffffff;
WORD32 max_l1_val = 0x7fffffff;
WORD32 cur_val;
WORD32 i4_local_weighted_sad, i4_local_cost_weighted_pred;
WORD32 bestl0_sad = 0x7fffffff;
WORD32 bestl1_sad = 0x7fffffff;
search_node_t *ps_best_l0_blk = NULL, *ps_best_l1_blk = NULL;
for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
i4_ref_id++)
{
wt_pred_val = ps_ctxt->s_wt_pred.a_wpred_wt[i4_ref_id];
log_wt_pred_val = ps_ctxt->s_wt_pred.wpred_log_wdc;
ps_search_node =
ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
i4_local_weighted_sad = ((ps_search_node->i4_sad * wt_pred_val) +
((1 << log_wt_pred_val) >> 1)) >>
log_wt_pred_val;
i4_local_cost_weighted_pred =
i4_local_weighted_sad +
(ps_search_node->i4_tot_cost - ps_search_node->i4_sad);
//the loop is redundant as the results are already sorted based on total cost
//for (i4_j = 0; i4_j < ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref; i4_j++)
{
if(i4_local_cost_weighted_pred < min_cost)
{
min_cost = i4_local_cost_weighted_pred;
min_sad = i4_local_weighted_sad;
}
}
/* For P frame, calculate the nearest poc which is either P or I frame*/
if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
{
if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id])
{
cur_val =
ABS(ps_ctxt->i4_curr_poc -
ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id]);
if(cur_val < max_val)
{
max_val = cur_val;
ref_id_of_nearest_poc = i4_ref_id;
}
}
}
}
/*Store me cost wrt. to past frame only for P frame */
if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
{
if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
{
WORD16 i2_mvx, i2_mvy;
WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
WORD32 z_scan_idx =
gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
WORD32 wt, log_wt;
/*ASSERT((ps_ctxt->i4_curr_poc - ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
<= (1 + ps_ctxt->num_b_frms));*/
/*obtain mvx and mvy */
i2_mvx =
ps_search_results
->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
->s_mv.i2_mvx;
i2_mvy =
ps_search_results
->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
->s_mv.i2_mvy;
/*register the min cost for l1 me in blk context */
wt = ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_of_nearest_poc];
log_wt = ps_ctxt->s_wt_pred.wpred_log_wdc;
/*register the min cost for l1 me in blk context */
ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] =
((ps_search_results
->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
->i4_sad *
wt) +
((1 << log_wt) >> 1)) >>
log_wt;
ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] =
ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] +
(ps_search_results
->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
->i4_tot_cost -
ps_search_results
->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
->i4_sad);
/*for complexity change detection*/
ps_ctxt->i4_num_blks++;
if(ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] >
(8 /*blk width*/ * 8 /*blk height*/ * (1 + ps_ctxt->num_b_frms)))
{
ps_ctxt->i4_num_blks_high_sad++;
}
}
}
}
/* EIID: Early inter intra decisions */
/* tap L1 level SAD for inter intra decisions */
if((e_me_quality_presets >= ME_MEDIUM_SPEED) &&
(!ps_ctxt->s_frm_prms
.is_i_pic)) //for high-quality preset->disable early decisions
{
if(1 == ps_refine_prms->i4_layer_id)
{
WORD32 i4_min_sad_cost_8x8_block = min_cost;
ihevce_ed_blk_t *ps_curr_ed_blk_ctxt;
WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
WORD32 z_scan_idx =
gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
ps_curr_ed_blk_ctxt = ps_ed_blk_ctxt_curr_ctb + z_scan_idx;
/*register the min cost for l1 me in blk context */
ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
i4_min_sad_cost_8x8_block;
i4_num_comparisions++;
/* take early inter-intra decision here */
ps_curr_ed_blk_ctxt->intra_or_inter = 3; /*init saying eval both */
#if DISABLE_INTRA_IN_BPICS
if((e_me_quality_presets == ME_XTREME_SPEED_25) &&
(ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
{
ps_curr_ed_blk_ctxt->intra_or_inter =
2; /*eval only inter if inter cost is less */
i4_num_inter_wins++;
}
else
#endif
{
if(ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] <
((ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2] *
i4_threshold_multiplier) /
i4_threshold_divider))
{
ps_curr_ed_blk_ctxt->intra_or_inter =
2; /*eval only inter if inter cost is less */
i4_num_inter_wins++;
}
}
//{
// DBG_PRINTF ("(blk x, blk y):(%d, %d)\t me:(ctb_x, ctb_y):(%d, %d)\t intra_SAD_COST: %d\tInter_SAD_COST: %d\n",
// blk_x,blk_y,
// i4_ctb_blk_ctr, i4_ctb_row_ctr,
// ps_curr_ed_blk_ctxt->i4_best_sad_8x8_l1_ipe,
// i4_min_sad_cost_8x8_block
// );
//}
} //end of layer-1
} //end of if (e_me_quality_presets >= ME_MEDIUM_SPEED)
else
{
if(1 == ps_refine_prms->i4_layer_id)
{
WORD32 i4_min_sad_cost_8x8_block = min_cost;
WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
WORD32 z_scan_idx =
gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
/*register the min cost for l1 me in blk context */
ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
i4_min_sad_cost_8x8_block;
}
}
if(1 == ps_refine_prms->i4_layer_id)
{
WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
WORD32 z_scan_idx =
gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me_for_decide[z_scan_idx >> 2] =
min_sad;
if(min_cost <
ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2])
{
ps_ctxt->i4_L1_hme_best_cost += min_cost;
ps_ctxt->i4_L1_hme_sad += min_sad;
ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] = min_sad;
}
else
{
ps_ctxt->i4_L1_hme_best_cost +=
ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2];
ps_ctxt->i4_L1_hme_sad +=
ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] =
ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
}
}
}
}
/* Update the number of blocks processed in the current row */
if((ME_MEDIUM_SPEED > e_me_quality_presets))
{
ihevce_dmgr_set_row_row_sync(
pv_hme_dep_mngr,
(i4_ctb_x + 1),
blk_y,
0 /* Col Tile No. : Not supported in PreEnc*/);
}
}
/* set the output dependency after completion of row */
ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
}
}