C++程序  |  10668行  |  416.87 KB

/******************************************************************************
 *
 * Copyright (C) 2018 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 *****************************************************************************
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
******************************************************************************
* @file hme_refine.c
*
* @brief
*    Contains the implementation of the refinement layer searches and related
*    functionality like CU merge.
*
* @author
*    Ittiam
*
*
* List of Functions
*
*
******************************************************************************
*/

/*****************************************************************************/
/* File Includes                                                             */
/*****************************************************************************/
/* System include files */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include <stdarg.h>
#include <math.h>
#include <limits.h>

/* User include files */
#include "ihevc_typedefs.h"
#include "itt_video_api.h"
#include "ihevce_api.h"

#include "rc_cntrl_param.h"
#include "rc_frame_info_collector.h"
#include "rc_look_ahead_params.h"

#include "ihevc_defs.h"
#include "ihevc_structs.h"
#include "ihevc_platform_macros.h"
#include "ihevc_deblk.h"
#include "ihevc_itrans_recon.h"
#include "ihevc_chroma_itrans_recon.h"
#include "ihevc_chroma_intra_pred.h"
#include "ihevc_intra_pred.h"
#include "ihevc_inter_pred.h"
#include "ihevc_mem_fns.h"
#include "ihevc_padding.h"
#include "ihevc_weighted_pred.h"
#include "ihevc_sao.h"
#include "ihevc_resi_trans.h"
#include "ihevc_quant_iquant_ssd.h"
#include "ihevc_cabac_tables.h"

#include "ihevce_defs.h"
#include "ihevce_lap_enc_structs.h"
#include "ihevce_multi_thrd_structs.h"
#include "ihevce_multi_thrd_funcs.h"
#include "ihevce_me_common_defs.h"
#include "ihevce_had_satd.h"
#include "ihevce_error_codes.h"
#include "ihevce_bitstream.h"
#include "ihevce_cabac.h"
#include "ihevce_rdoq_macros.h"
#include "ihevce_function_selector.h"
#include "ihevce_enc_structs.h"
#include "ihevce_entropy_structs.h"
#include "ihevce_cmn_utils_instr_set_router.h"
#include "ihevce_enc_loop_structs.h"
#include "ihevce_bs_compute_ctb.h"
#include "ihevce_global_tables.h"
#include "ihevce_dep_mngr_interface.h"
#include "hme_datatype.h"
#include "hme_interface.h"
#include "hme_common_defs.h"
#include "hme_defs.h"
#include "ihevce_me_instr_set_router.h"
#include "hme_globals.h"
#include "hme_utils.h"
#include "hme_coarse.h"
#include "hme_fullpel.h"
#include "hme_subpel.h"
#include "hme_refine.h"
#include "hme_err_compute.h"
#include "hme_common_utils.h"
#include "hme_search_algo.h"
#include "ihevce_stasino_helpers.h"
#include "ihevce_common_utils.h"

/*****************************************************************************/
/* Globals                                                                   */
/*****************************************************************************/

/* brief: mapping buffer to convert raster scan indices into z-scan oder in a ctb */
UWORD8 gau1_raster_scan_to_ctb[4][4] = {
    { 0, 4, 16, 20 }, { 8, 12, 24, 28 }, { 32, 36, 48, 52 }, { 40, 44, 56, 60 }
};

/*****************************************************************************/
/* Extern Fucntion declaration                                               */
/*****************************************************************************/
extern ctb_boundary_attrs_t *
    get_ctb_attrs(S32 ctb_start_x, S32 ctb_start_y, S32 pic_wd, S32 pic_ht, me_frm_ctxt_t *ps_ctxt);

typedef void (*PF_HME_PROJECT_COLOC_CANDT_FXN)(
    search_node_t *ps_search_node,
    layer_ctxt_t *ps_curr_layer,
    layer_ctxt_t *ps_coarse_layer,
    S32 i4_pos_x,
    S32 i4_pos_y,
    S08 i1_ref_id,
    S32 i4_result_id);

typedef void (*PF_HME_PROJECT_COLOC_CANDT_L0_ME_FXN)(
    search_node_t *ps_search_node,
    layer_ctxt_t *ps_curr_layer,
    layer_ctxt_t *ps_coarse_layer,
    S32 i4_pos_x,
    S32 i4_pos_y,
    S32 i4_num_act_ref_l0,
    U08 u1_pred_dir,
    U08 u1_default_ref_id,
    S32 i4_result_id);

/*****************************************************************************/
/* Function Definitions                                                      */
/*****************************************************************************/

void ihevce_no_wt_copy(
    coarse_me_ctxt_t *ps_ctxt,
    layer_ctxt_t *ps_curr_layer,
    pu_t *ps_pu,
    UWORD8 *pu1_temp_pred,
    WORD32 temp_stride,
    WORD32 blk_x,
    WORD32 blk_y)
{
    UWORD8 *pu1_ref;
    WORD32 ref_stride, ref_offset;
    WORD32 row, col, i4_tmp;

    ASSERT((ps_pu->b2_pred_mode == PRED_L0) || (ps_pu->b2_pred_mode == PRED_L1));

    if(ps_pu->b2_pred_mode == PRED_L0)
    {
        WORD8 i1_ref_idx;

        i1_ref_idx = ps_pu->mv.i1_l0_ref_idx;
        pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];

        ref_stride = ps_curr_layer->i4_inp_stride;

        ref_offset = ((blk_y << 3) + ps_pu->mv.s_l0_mv.i2_mvy) * ref_stride;
        ref_offset += (blk_x << 3) + ps_pu->mv.s_l0_mv.i2_mvx;

        pu1_ref += ref_offset;

        for(row = 0; row < temp_stride; row++)
        {
            for(col = 0; col < temp_stride; col++)
            {
                i4_tmp = pu1_ref[col];
                pu1_temp_pred[col] = CLIP_U8(i4_tmp);
            }

            pu1_ref += ref_stride;
            pu1_temp_pred += temp_stride;
        }
    }
    else
    {
        WORD8 i1_ref_idx;

        i1_ref_idx = ps_pu->mv.i1_l1_ref_idx;
        pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];

        ref_stride = ps_curr_layer->i4_inp_stride;

        ref_offset = ((blk_y << 3) + ps_pu->mv.s_l1_mv.i2_mvy) * ref_stride;
        ref_offset += (blk_x << 3) + ps_pu->mv.s_l1_mv.i2_mvx;

        pu1_ref += ref_offset;

        for(row = 0; row < temp_stride; row++)
        {
            for(col = 0; col < temp_stride; col++)
            {
                i4_tmp = pu1_ref[col];
                pu1_temp_pred[col] = CLIP_U8(i4_tmp);
            }

            pu1_ref += ref_stride;
            pu1_temp_pred += temp_stride;
        }
    }
}

static WORD32 hme_add_clustered_mvs_as_merge_cands(
    cluster_data_t *ps_cluster_base,
    search_node_t *ps_merge_cand,
    range_prms_t **pps_range_prms,
    U08 *pu1_refid_to_pred_dir_list,
    WORD32 i4_num_clusters,
    U08 u1_pred_dir)
{
    WORD32 i, j, k;
    WORD32 i4_num_cands_added = 0;
    WORD32 i4_num_mvs_in_cluster;

    for(i = 0; i < i4_num_clusters; i++)
    {
        cluster_data_t *ps_data = &ps_cluster_base[i];

        if(u1_pred_dir == !pu1_refid_to_pred_dir_list[ps_data->ref_id])
        {
            i4_num_mvs_in_cluster = ps_data->num_mvs;

            for(j = 0; j < i4_num_mvs_in_cluster; j++)
            {
                ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_data->as_mv[j].mvx;
                ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_data->as_mv[j].mvy;
                ps_merge_cand[i4_num_cands_added].i1_ref_idx = ps_data->ref_id;

                CLIP_MV_WITHIN_RANGE(
                    ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
                    ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
                    pps_range_prms[ps_data->ref_id],
                    0,
                    0,
                    0);

                for(k = 0; k < i4_num_cands_added; k++)
                {
                    if((ps_merge_cand[k].s_mv.i2_mvx == ps_data->as_mv[j].mvx) &&
                       (ps_merge_cand[k].s_mv.i2_mvy == ps_data->as_mv[j].mvy) &&
                       (ps_merge_cand[k].i1_ref_idx == ps_data->ref_id))
                    {
                        break;
                    }
                }

                if(k == i4_num_cands_added)
                {
                    i4_num_cands_added++;
                }
            }
        }
    }

    return i4_num_cands_added;
}

static WORD32 hme_add_me_best_as_merge_cands(
    search_results_t **pps_child_data_array,
    inter_cu_results_t *ps_8x8cu_results,
    search_node_t *ps_merge_cand,
    range_prms_t **pps_range_prms,
    U08 *pu1_refid_to_pred_dir_list,
    S08 *pi1_past_list,
    S08 *pi1_future_list,
    BLK_SIZE_T e_blk_size,
    ME_QUALITY_PRESETS_T e_quality_preset,
    S32 i4_num_cands_added,
    U08 u1_pred_dir)
{
    WORD32 i, j, k;
    WORD32 i4_max_cands_to_add;

    WORD32 i4_result_id = 0;

    ASSERT(!pps_child_data_array[0]->u1_split_flag || (BLK_64x64 != e_blk_size));
    ASSERT(!pps_child_data_array[1]->u1_split_flag || (BLK_64x64 != e_blk_size));
    ASSERT(!pps_child_data_array[2]->u1_split_flag || (BLK_64x64 != e_blk_size));
    ASSERT(!pps_child_data_array[3]->u1_split_flag || (BLK_64x64 != e_blk_size));

    switch(e_quality_preset)
    {
    case ME_PRISTINE_QUALITY:
    {
        i4_max_cands_to_add = MAX_MERGE_CANDTS;

        break;
    }
    case ME_HIGH_QUALITY:
    {
        /* All 4 children are split and each grandchild contributes an MV */
        /* and 2 best results per grandchild */
        i4_max_cands_to_add = 4 * 4 * 2;

        break;
    }
    case ME_MEDIUM_SPEED:
    {
        i4_max_cands_to_add = 4 * 2 * 2;

        break;
    }
    case ME_HIGH_SPEED:
    case ME_XTREME_SPEED:
    case ME_XTREME_SPEED_25:
    {
        i4_max_cands_to_add = 4 * 2 * 1;

        break;
    }
    }

    while(i4_result_id < 4)
    {
        for(i = 0; i < 4; i++)
        {
            inter_cu_results_t *ps_child_data = pps_child_data_array[i]->ps_cu_results;
            inter_cu_results_t *ps_grandchild_data = &ps_8x8cu_results[i << 2];

            if(!pps_child_data_array[i]->u1_split_flag)
            {
                part_type_results_t *ps_data = &ps_child_data->ps_best_results[i4_result_id];

                if(ps_child_data->u1_num_best_results <= i4_result_id)
                {
                    continue;
                }

                if(ps_data->as_pu_results->pu.b1_intra_flag)
                {
                    continue;
                }

                for(j = 0; j <= (ps_data->u1_part_type != PRT_2Nx2N); j++)
                {
                    mv_t *ps_mv;

                    S08 i1_ref_idx;

                    pu_t *ps_pu = &ps_data->as_pu_results[j].pu;

                    if(u1_pred_dir !=
                       ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
                    {
                        continue;
                    }

                    if(u1_pred_dir)
                    {
                        ps_mv = &ps_pu->mv.s_l1_mv;
                        i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
                    }
                    else
                    {
                        ps_mv = &ps_pu->mv.s_l0_mv;
                        i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
                    }

                    if(-1 == i1_ref_idx)
                    {
                        continue;
                    }

                    ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
                    ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
                    ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;

                    CLIP_MV_WITHIN_RANGE(
                        ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
                        ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
                        pps_range_prms[i1_ref_idx],
                        0,
                        0,
                        0);

                    for(k = 0; k < i4_num_cands_added; k++)
                    {
                        if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
                           (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
                           (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
                        {
                            break;
                        }
                    }

                    if(k == i4_num_cands_added)
                    {
                        i4_num_cands_added++;

                        if(i4_max_cands_to_add <= i4_num_cands_added)
                        {
                            return i4_num_cands_added;
                        }
                    }
                }
            }
            else
            {
                for(j = 0; j < 4; j++)
                {
                    mv_t *ps_mv;

                    S08 i1_ref_idx;

                    part_type_results_t *ps_data = ps_grandchild_data[j].ps_best_results;
                    pu_t *ps_pu = &ps_data->as_pu_results[0].pu;

                    ASSERT(ps_data->u1_part_type == PRT_2Nx2N);

                    if(ps_grandchild_data[j].u1_num_best_results <= i4_result_id)
                    {
                        continue;
                    }

                    if(ps_data->as_pu_results->pu.b1_intra_flag)
                    {
                        continue;
                    }

                    if(u1_pred_dir !=
                       ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
                    {
                        continue;
                    }

                    if(u1_pred_dir)
                    {
                        ps_mv = &ps_pu->mv.s_l1_mv;
                        i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
                    }
                    else
                    {
                        ps_mv = &ps_pu->mv.s_l0_mv;
                        i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
                    }

                    ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
                    ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
                    ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;

                    CLIP_MV_WITHIN_RANGE(
                        ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
                        ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
                        pps_range_prms[i1_ref_idx],
                        0,
                        0,
                        0);

                    for(k = 0; k < i4_num_cands_added; k++)
                    {
                        if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
                           (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
                           (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
                        {
                            break;
                        }
                    }

                    if(k == i4_num_cands_added)
                    {
                        i4_num_cands_added++;

                        if(i4_max_cands_to_add <= i4_num_cands_added)
                        {
                            return i4_num_cands_added;
                        }
                    }
                }
            }
        }

        i4_result_id++;
    }

    return i4_num_cands_added;
}

WORD32 hme_add_cands_for_merge_eval(
    ctb_cluster_info_t *ps_cluster_info,
    search_results_t **pps_child_data_array,
    inter_cu_results_t *ps_8x8cu_results,
    range_prms_t **pps_range_prms,
    search_node_t *ps_merge_cand,
    U08 *pu1_refid_to_pred_dir_list,
    S08 *pi1_past_list,
    S08 *pi1_future_list,
    ME_QUALITY_PRESETS_T e_quality_preset,
    BLK_SIZE_T e_blk_size,
    U08 u1_pred_dir,
    U08 u1_blk_id)
{
    WORD32 i4_num_cands_added = 0;

    if(ME_PRISTINE_QUALITY == e_quality_preset)
    {
        cluster_data_t *ps_cluster_primo;

        WORD32 i4_num_clusters;

        if(BLK_32x32 == e_blk_size)
        {
            ps_cluster_primo = ps_cluster_info->ps_32x32_blk[u1_blk_id].as_cluster_data;
            i4_num_clusters = ps_cluster_info->ps_32x32_blk[u1_blk_id].num_clusters;
        }
        else
        {
            ps_cluster_primo = ps_cluster_info->ps_64x64_blk->as_cluster_data;
            i4_num_clusters = ps_cluster_info->ps_64x64_blk->num_clusters;
        }

        i4_num_cands_added = hme_add_clustered_mvs_as_merge_cands(
            ps_cluster_primo,
            ps_merge_cand,
            pps_range_prms,
            pu1_refid_to_pred_dir_list,
            i4_num_clusters,
            u1_pred_dir);
    }

    i4_num_cands_added = hme_add_me_best_as_merge_cands(
        pps_child_data_array,
        ps_8x8cu_results,
        ps_merge_cand,
        pps_range_prms,
        pu1_refid_to_pred_dir_list,
        pi1_past_list,
        pi1_future_list,
        e_blk_size,
        e_quality_preset,
        i4_num_cands_added,
        u1_pred_dir);

    return i4_num_cands_added;
}

/**
********************************************************************************
*  @fn   void hme_pick_refine_merge_candts(hme_merge_prms_t *ps_merge_prms,
*                                           S08 i1_ref_idx,
*                                           S32 i4_best_part_type,
*                                           S32 i4_is_vert)
*
*  @brief  Given a target partition orientation in the merged CU, and the
*          partition type of most likely partition this fxn picks up
*          candidates from the 4 constituent CUs and does refinement search
*          to identify best results for the merge CU across active partitions
*
*  @param[in,out] ps_merge_prms : Parameters sent from higher layers. Out of
*                  these params, the search result structure is also derived and
*                 updated during the search
*
*  @param[in] i1_ref_idx : ID of the buffer within the search results to update.
*               Will be 0 if all refidx collapsed to one buf, else it'll be 0/1
*
*  @param[in] i4_best_part_type : partition type of potential partition in the
*              merged CU, -1 if the merge process has not yet been able to
*              determine this.
*
*  @param[in] i4_is_vert : Whether target partition of merged CU is vertical
*             orientation or horizontal orientation.
*
*  @return Number of merge candidates
********************************************************************************
*/
WORD32 hme_pick_eval_merge_candts(
    hme_merge_prms_t *ps_merge_prms,
    hme_subpel_prms_t *ps_subpel_prms,
    S32 i4_search_idx,
    S32 i4_best_part_type,
    S32 i4_is_vert,
    wgt_pred_ctxt_t *ps_wt_inp_prms,
    S32 i4_frm_qstep,
    ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
    ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
{
    S32 x_off, y_off;
    search_node_t *ps_search_node;
    S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
    S32 i4_num_valid_parts;
    pred_ctxt_t *ps_pred_ctxt;

    search_node_t as_merge_unique_node[MAX_MERGE_CANDTS];
    S32 num_unique_nodes_cu_merge = 0;

    search_results_t *ps_search_results = ps_merge_prms->ps_results_merge;
    CU_SIZE_T e_cu_size = ps_search_results->e_cu_size;
    S32 i4_part_mask = ps_search_results->i4_part_mask;

    search_results_t *aps_child_results[4];
    layer_ctxt_t *ps_curr_layer = ps_merge_prms->ps_layer_ctxt;

    S32 i4_ref_stride, i, j;
    result_upd_prms_t s_result_prms;

    BLK_SIZE_T e_blk_size = ge_cu_size_to_blk_size[e_cu_size];
    S32 i4_offset;

    /*************************************************************************/
    /* Function pointer for SAD/SATD, array and prms structure to pass to    */
    /* This function                                                         */
    /*************************************************************************/
    PF_SAD_FXN_T pf_err_compute;
    S32 ai4_sad_grid[9][17];
    err_prms_t s_err_prms;

    /*************************************************************************/
    /* Allowed MV RANGE                                                      */
    /*************************************************************************/
    range_prms_t **pps_range_prms = ps_merge_prms->aps_mv_range;
    PF_INTERP_FXN_T pf_qpel_interp;
    PF_MV_COST_FXN pf_mv_cost_compute;
    WORD32 pred_lx;
    U08 *apu1_hpel_ref[4];

    interp_prms_t s_interp_prms;
    S32 i4_interp_buf_id;

    S32 i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
    S32 i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;

    /* Sanity checks */
    ASSERT((e_blk_size == BLK_64x64) || (e_blk_size == BLK_32x32));

    s_err_prms.ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list;

    /* Initialize all the ptrs to child CUs for merge decision */
    aps_child_results[0] = ps_merge_prms->ps_results_tl;
    aps_child_results[1] = ps_merge_prms->ps_results_tr;
    aps_child_results[2] = ps_merge_prms->ps_results_bl;
    aps_child_results[3] = ps_merge_prms->ps_results_br;

    num_unique_nodes_cu_merge = 0;

    pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;

    if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
    {
        num_unique_nodes_cu_merge = hme_add_cands_for_merge_eval(
            ps_merge_prms->ps_cluster_info,
            aps_child_results,
            ps_merge_prms->ps_8x8_cu_results,
            pps_range_prms,
            as_merge_unique_node,
            ps_search_results->pu1_is_past,
            ps_merge_prms->pi1_past_list,
            ps_merge_prms->pi1_future_list,
            ps_merge_prms->e_quality_preset,
            e_blk_size,
            i4_search_idx,
            (ps_merge_prms->ps_results_merge->u1_x_off >> 5) +
                (ps_merge_prms->ps_results_merge->u1_y_off >> 4));
    }
    else
    {
        /*************************************************************************/
        /* Populate the list of unique search nodes in the child CUs for merge   */
        /* evaluation                                                            */
        /*************************************************************************/
        for(i = 0; i < 4; i++)
        {
            search_node_t s_search_node;

            PART_TYPE_T e_part_type;
            PART_ID_T e_part_id;

            WORD32 part_num;

            search_results_t *ps_child = aps_child_results[i];

            if(ps_child->ps_cu_results->u1_num_best_results)
            {
                if(!((ps_child->ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
                     (1 == ps_child->ps_cu_results->u1_num_best_results)))
                {
                    e_part_type =
                        (PART_TYPE_T)ps_child->ps_cu_results->ps_best_results[0].u1_part_type;

                    ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);

                    /* Insert mvs of NxN partitions. */
                    for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
                        part_num++)
                    {
                        e_part_id = ge_part_type_to_part_id[e_part_type][part_num];

                        if(ps_child->aps_part_results[i4_search_idx][e_part_id]->i1_ref_idx != -1)
                        {
                            s_search_node = *ps_child->aps_part_results[i4_search_idx][e_part_id];
                            if(s_search_node.s_mv.i2_mvx != INTRA_MV)
                            {
                                CLIP_MV_WITHIN_RANGE(
                                    s_search_node.s_mv.i2_mvx,
                                    s_search_node.s_mv.i2_mvy,
                                    pps_range_prms[s_search_node.i1_ref_idx],
                                    0,
                                    0,
                                    0);

                                INSERT_NEW_NODE_NOMAP(
                                    as_merge_unique_node,
                                    num_unique_nodes_cu_merge,
                                    s_search_node,
                                    1);
                            }
                        }
                    }
                }
            }
            else if(!((ps_merge_prms->ps_results_grandchild[(i << 2)]
                           .ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
                      (1 == ps_merge_prms->ps_results_grandchild[(i << 2)]
                                .ps_cu_results->u1_num_best_results)))
            {
                search_results_t *ps_results_root = &ps_merge_prms->ps_results_grandchild[(i << 2)];

                for(j = 0; j < 4; j++)
                {
                    e_part_type = (PART_TYPE_T)ps_results_root[j]
                                      .ps_cu_results->ps_best_results[0]
                                      .u1_part_type;

                    ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);

                    /* Insert mvs of NxN partitions. */
                    for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
                        part_num++)
                    {
                        e_part_id = ge_part_type_to_part_id[e_part_type][part_num];

                        if((ps_results_root[j]
                                .aps_part_results[i4_search_idx][e_part_id]
                                ->i1_ref_idx != -1) &&
                           (!ps_child->ps_cu_results->ps_best_results->as_pu_results->pu
                                 .b1_intra_flag))
                        {
                            s_search_node =
                                *ps_results_root[j].aps_part_results[i4_search_idx][e_part_id];
                            if(s_search_node.s_mv.i2_mvx != INTRA_MV)
                            {
                                CLIP_MV_WITHIN_RANGE(
                                    s_search_node.s_mv.i2_mvx,
                                    s_search_node.s_mv.i2_mvy,
                                    pps_range_prms[s_search_node.i1_ref_idx],
                                    0,
                                    0,
                                    0);

                                INSERT_NEW_NODE_NOMAP(
                                    as_merge_unique_node,
                                    num_unique_nodes_cu_merge,
                                    s_search_node,
                                    1);
                            }
                        }
                    }
                }
            }
        }
    }

    if(0 == num_unique_nodes_cu_merge)
    {
        return 0;
    }

    /*************************************************************************/
    /* Appropriate Err compute fxn, depends on SAD/SATD, blk size and remains*/
    /* fixed through this subpel refinement for this partition.              */
    /* Note, we do not enable grid sads since one pt is evaluated per node   */
    /* Hence, part mask is also nearly dont care and we use 2Nx2N enabled.   */
    /*************************************************************************/
    i4_part_mask = ps_search_results->i4_part_mask;

    /* Need to add the corresponding SAD functions for EXTREME SPEED : Lokesh */
    if(ps_subpel_prms->i4_use_satd)
    {
        if(BLK_32x32 == e_blk_size)
        {
            pf_err_compute = hme_evalsatd_pt_pu_32x32;
        }
        else
        {
            pf_err_compute = hme_evalsatd_pt_pu_64x64;
        }
    }
    else
    {
        pf_err_compute = (PF_SAD_FXN_T)hme_evalsad_grid_pu_MxM;
    }

    i4_ref_stride = ps_curr_layer->i4_rec_stride;

    x_off = ps_merge_prms->ps_results_tl->u1_x_off;
    y_off = ps_merge_prms->ps_results_tl->u1_y_off;
    i4_offset = x_off + i4_ctb_x_off + ((y_off + i4_ctb_y_off) * i4_ref_stride);

    /*************************************************************************/
    /* This array stores the ids of the partitions whose                     */
    /* SADs are updated. Since the partitions whose SADs are updated may not */
    /* be in contiguous order, we supply another level of indirection.       */
    /*************************************************************************/
    i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);

    /* Initialize result params used for partition update */
    s_result_prms.pf_mv_cost_compute = NULL;
    s_result_prms.ps_search_results = ps_search_results;
    s_result_prms.pi4_valid_part_ids = ai4_valid_part_ids;
    s_result_prms.i1_ref_idx = i4_search_idx;
    s_result_prms.i4_part_mask = i4_part_mask;
    s_result_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
    s_result_prms.i4_grid_mask = 1;

    /* One time Initialization of error params used for SAD/SATD compute */
    s_err_prms.i4_inp_stride = ps_subpel_prms->i4_inp_stride;
    s_err_prms.i4_ref_stride = i4_ref_stride;
    s_err_prms.i4_part_mask = (ENABLE_2Nx2N);
    s_err_prms.i4_grid_mask = 1;
    s_err_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
    s_err_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
    s_err_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
    s_err_prms.i4_step = 1;

    /*************************************************************************/
    /* One time preparation of non changing interpolation params.            */
    /*************************************************************************/
    s_interp_prms.i4_ref_stride = i4_ref_stride;
    s_interp_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
    s_interp_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
    s_interp_prms.apu1_interp_out[0] = ps_subpel_prms->pu1_wkg_mem;
    s_interp_prms.i4_out_stride = gau1_blk_size_to_wd[e_blk_size];
    i4_interp_buf_id = 0;

    pf_qpel_interp = ps_subpel_prms->pf_qpel_interp;

    /***************************************************************************/
    /* Compute SATD/SAD for all unique nodes of children CUs to get best merge */
    /* results                                                                 */
    /***************************************************************************/
    for(i = 0; i < num_unique_nodes_cu_merge; i++)
    {
        WORD8 i1_ref_idx;
        ps_search_node = &as_merge_unique_node[i];

        /*********************************************************************/
        /* Compute the base pointer for input, interpolated buffers          */
        /* The base pointers point as follows:                               */
        /* fx fy : 0, 0 :: fx, hy : 0, 0.5, hx, fy: 0.5, 0, hx, fy: 0.5, 0.5 */
        /* To these, we need to add the offset of the current node           */
        /*********************************************************************/
        i1_ref_idx = ps_search_node->i1_ref_idx;
        apu1_hpel_ref[0] = ps_curr_layer->ppu1_list_rec_fxfy[i1_ref_idx] + i4_offset;
        apu1_hpel_ref[1] = ps_curr_layer->ppu1_list_rec_hxfy[i1_ref_idx] + i4_offset;
        apu1_hpel_ref[2] = ps_curr_layer->ppu1_list_rec_fxhy[i1_ref_idx] + i4_offset;
        apu1_hpel_ref[3] = ps_curr_layer->ppu1_list_rec_hxhy[i1_ref_idx] + i4_offset;

        s_interp_prms.ppu1_ref = &apu1_hpel_ref[0];

        pf_qpel_interp(
            &s_interp_prms,
            ps_search_node->s_mv.i2_mvx,
            ps_search_node->s_mv.i2_mvy,
            i4_interp_buf_id);

        pred_lx = i4_search_idx;
        ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];

        s_result_prms.u1_pred_lx = pred_lx;
        s_result_prms.ps_search_node_base = ps_search_node;
        s_err_prms.pu1_inp =
            ps_wt_inp_prms->apu1_wt_inp[i1_ref_idx] + x_off + y_off * ps_subpel_prms->i4_inp_stride;
        s_err_prms.pu1_ref = s_interp_prms.pu1_final_out;
        s_err_prms.i4_ref_stride = s_interp_prms.i4_final_out_stride;

        /* Carry out the SAD/SATD. This call also does the TU RECURSION.
        Here the tu recursion logic is restricted with the size of the PU*/
        pf_err_compute(&s_err_prms);

        if(ps_subpel_prms->u1_is_cu_noisy &&
           ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
        {
            ps_me_optimised_function_list->pf_compute_stim_injected_distortion_for_all_parts(
                s_err_prms.pu1_ref,
                s_err_prms.i4_ref_stride,
                ai4_valid_part_ids,
                ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX,
                ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
                s_err_prms.pi4_sad_grid,
                ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier,
                ps_wt_inp_prms->a_inv_wpred_wt[i1_ref_idx],
                ps_wt_inp_prms->ai4_shift_val[i1_ref_idx],
                i4_num_valid_parts,
                ps_wt_inp_prms->wpred_log_wdc,
                (BLK_32x32 == e_blk_size) ? 32 : 64);
        }

        /* Update the mv's */
        s_result_prms.i2_mv_x = ps_search_node->s_mv.i2_mvx;
        s_result_prms.i2_mv_y = ps_search_node->s_mv.i2_mvy;

        /* Update best results */
        hme_update_results_pt_pu_best1_subpel_hs(&s_err_prms, &s_result_prms);
    }

    /************************************************************************/
    /* Update mv cost and total cost for each valid partition in the CU     */
    /************************************************************************/
    for(i = 0; i < TOT_NUM_PARTS; i++)
    {
        if(i4_part_mask & (1 << i))
        {
            WORD32 j;
            WORD32 i4_mv_cost;

            ps_search_node = ps_search_results->aps_part_results[i4_search_idx][i];

            for(j = 0;
                j < MIN(ps_search_results->u1_num_results_per_part, num_unique_nodes_cu_merge);
                j++)
            {
                if(ps_search_node->i1_ref_idx != -1)
                {
                    pred_lx = i4_search_idx;
                    ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];

                    /* Prediction context should now deal with qpel units */
                    HME_SET_MVPRED_RES(ps_pred_ctxt, MV_RES_QPEL);

                    ps_search_node->u1_subpel_done = 1;
                    ps_search_node->u1_is_avail = 1;

                    i4_mv_cost =
                        pf_mv_cost_compute(ps_search_node, ps_pred_ctxt, (PART_ID_T)i, MV_RES_QPEL);

                    ps_search_node->i4_tot_cost = i4_mv_cost + ps_search_node->i4_sad;
                    ps_search_node->i4_mv_cost = i4_mv_cost;

                    ps_search_node++;
                }
            }
        }
    }

    return num_unique_nodes_cu_merge;
}

#define CU_MERGE_MAX_INTRA_PARTS 4

/**
********************************************************************************
*  @fn     hme_try_merge_high_speed
*
*  @brief  Attempts to merge 4 NxN candts to a 2Nx2N candt, either as a single
entity or with partititons for high speed preset
*
*  @param[in,out]  hme_merge_prms_t: Params for CU merge
*
*  @return MERGE_RESULT_T type result of merge (CU_MERGED/CU_SPLIT)
********************************************************************************
*/
CU_MERGE_RESULT_T hme_try_merge_high_speed(
    me_ctxt_t *ps_thrd_ctxt,
    me_frm_ctxt_t *ps_ctxt,
    ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
    hme_subpel_prms_t *ps_subpel_prms,
    hme_merge_prms_t *ps_merge_prms,
    inter_pu_results_t *ps_pu_results,
    pu_result_t *ps_pu_result)
{
    search_results_t *ps_results_tl, *ps_results_tr;
    search_results_t *ps_results_bl, *ps_results_br;

    S32 i;
    S32 i4_search_idx;
    S32 i4_cost_parent;
    S32 intra_cu_size;
    ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];

    search_results_t *ps_results_merge = ps_merge_prms->ps_results_merge;
    wgt_pred_ctxt_t *ps_wt_inp_prms = &ps_ctxt->s_wt_pred;

    S32 i4_part_mask = ENABLE_ALL_PARTS - ENABLE_NxN;
    S32 is_vert = 0, i4_best_part_type = -1;
    S32 i4_intra_parts = 0; /* Keeps track of intra percentage before merge */
    S32 i4_cost_children = 0;
    S32 i4_frm_qstep = ps_ctxt->frm_qstep;
    S32 i4_num_merge_cands_evaluated = 0;
    U08 u1_x_off = ps_results_merge->u1_x_off;
    U08 u1_y_off = ps_results_merge->u1_y_off;
    S32 i4_32x32_id = (u1_y_off >> 4) + (u1_x_off >> 5);

    ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
        ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
    ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
        ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
    ps_results_tl = ps_merge_prms->ps_results_tl;
    ps_results_tr = ps_merge_prms->ps_results_tr;
    ps_results_bl = ps_merge_prms->ps_results_bl;
    ps_results_br = ps_merge_prms->ps_results_br;

    if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED)
    {
        i4_part_mask &= ~ENABLE_AMP;
    }

    if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25)
    {
        i4_part_mask &= ~ENABLE_AMP;

        i4_part_mask &= ~ENABLE_SMP;
    }

    ps_merge_prms->i4_num_pred_dir_actual = 0;

    /*************************************************************************/
    /* The logic for High speed CU merge goes as follows:                    */
    /*                                                                       */
    /* 1. Early exit with CU_SPLIT if sum of best partitions of children CUs */
    /*    exceed 7                                                           */
    /* 2. Early exit with CU_MERGE if mvs of best partitions of children CUs */
    /*    are identical                                                      */
    /* 3. Find the all unique mvs of best partitions of children CUs and     */
    /*    evaluate partial SATDs (all 17 partitions) for each unique mv. If  */
    /*    best parent cost is lower than sum of the best children costs      */
    /*    return CU_MERGE after seeding the best results else return CU_SPLIT*/
    /*                                                                       */
    /*************************************************************************/

    /* Count the number of best partitions in child CUs, early exit if > 7 */
    if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
       (CU_32x32 == ps_results_merge->e_cu_size))
    {
        S32 num_parts_in_32x32 = 0;
        WORD32 i4_part_type;

        if(ps_results_tl->u1_split_flag)
        {
            num_parts_in_32x32 += 4;

#define COST_INTERCHANGE 0
            i4_cost_children = ps_merge_prms->ps_8x8_cu_results[0].ps_best_results->i4_tot_cost +
                               ps_merge_prms->ps_8x8_cu_results[1].ps_best_results->i4_tot_cost +
                               ps_merge_prms->ps_8x8_cu_results[2].ps_best_results->i4_tot_cost +
                               ps_merge_prms->ps_8x8_cu_results[3].ps_best_results->i4_tot_cost;
        }
        else
        {
            i4_part_type = ps_results_tl->ps_cu_results->ps_best_results[0].u1_part_type;
            num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
            i4_cost_children = ps_results_tl->ps_cu_results->ps_best_results[0].i4_tot_cost;
        }

        if(ps_results_tr->u1_split_flag)
        {
            num_parts_in_32x32 += 4;

            i4_cost_children += ps_merge_prms->ps_8x8_cu_results[4].ps_best_results->i4_tot_cost +
                                ps_merge_prms->ps_8x8_cu_results[5].ps_best_results->i4_tot_cost +
                                ps_merge_prms->ps_8x8_cu_results[6].ps_best_results->i4_tot_cost +
                                ps_merge_prms->ps_8x8_cu_results[7].ps_best_results->i4_tot_cost;
        }
        else
        {
            i4_part_type = ps_results_tr->ps_cu_results->ps_best_results[0].u1_part_type;
            num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
            i4_cost_children += ps_results_tr->ps_cu_results->ps_best_results[0].i4_tot_cost;
        }

        if(ps_results_bl->u1_split_flag)
        {
            num_parts_in_32x32 += 4;

            i4_cost_children += ps_merge_prms->ps_8x8_cu_results[8].ps_best_results->i4_tot_cost +
                                ps_merge_prms->ps_8x8_cu_results[9].ps_best_results->i4_tot_cost +
                                ps_merge_prms->ps_8x8_cu_results[10].ps_best_results->i4_tot_cost +
                                ps_merge_prms->ps_8x8_cu_results[11].ps_best_results->i4_tot_cost;
        }
        else
        {
            i4_part_type = ps_results_bl->ps_cu_results->ps_best_results[0].u1_part_type;
            num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
            i4_cost_children += ps_results_bl->ps_cu_results->ps_best_results[0].i4_tot_cost;
        }

        if(ps_results_br->u1_split_flag)
        {
            num_parts_in_32x32 += 4;

            i4_cost_children += ps_merge_prms->ps_8x8_cu_results[12].ps_best_results->i4_tot_cost +
                                ps_merge_prms->ps_8x8_cu_results[13].ps_best_results->i4_tot_cost +
                                ps_merge_prms->ps_8x8_cu_results[14].ps_best_results->i4_tot_cost +
                                ps_merge_prms->ps_8x8_cu_results[15].ps_best_results->i4_tot_cost;
        }
        else
        {
            i4_part_type = ps_results_br->ps_cu_results->ps_best_results[0].u1_part_type;
            num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
            i4_cost_children += ps_results_br->ps_cu_results->ps_best_results[0].i4_tot_cost;
        }

        if((num_parts_in_32x32 > 7) && (ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY))
        {
            return CU_SPLIT;
        }

        if((num_parts_in_32x32 > MAX_NUM_CONSTITUENT_MVS_TO_ENABLE_32MERGE_IN_XS25) &&
           (ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25))
        {
            return CU_SPLIT;
        }
    }

    /* Accumulate intra percentage before merge for early CU_SPLIT decision     */
    /* Note : Each intra part represent a NxN unit of the children CUs          */
    /* This is essentially 1/16th of the CUsize under consideration for merge   */
    if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
    {
        if(CU_64x64 == ps_results_merge->e_cu_size)
        {
            i4_intra_parts =
                (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_inter_eval_enable)
                    ? 16
                    : ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_intra_eval_enable;
        }
        else
        {
            switch((ps_results_merge->u1_x_off >> 5) + ((ps_results_merge->u1_y_off >> 4)))
            {
            case 0:
            {
                i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tl
                                       ->u1_inter_eval_enable)
                                     ? 16
                                     : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
                                            ->ps_child_node_tl->u1_intra_eval_enable);

                break;
            }
            case 1:
            {
                i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tr
                                       ->u1_inter_eval_enable)
                                     ? 16
                                     : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
                                            ->ps_child_node_tr->u1_intra_eval_enable);

                break;
            }
            case 2:
            {
                i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_bl
                                       ->u1_inter_eval_enable)
                                     ? 16
                                     : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
                                            ->ps_child_node_bl->u1_intra_eval_enable);

                break;
            }
            case 3:
            {
                i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_br
                                       ->u1_inter_eval_enable)
                                     ? 16
                                     : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
                                            ->ps_child_node_br->u1_intra_eval_enable);

                break;
            }
            }
        }
    }
    else
    {
        for(i = 0; i < 4; i++)
        {
            search_results_t *ps_results =
                (i == 0) ? ps_results_tl
                         : ((i == 1) ? ps_results_tr : ((i == 2) ? ps_results_bl : ps_results_br));

            part_type_results_t *ps_best_res = &ps_results->ps_cu_results->ps_best_results[0];

            if(ps_results->u1_split_flag)
            {
                U08 u1_x_off = ps_results->u1_x_off;
                U08 u1_y_off = ps_results->u1_y_off;
                U08 u1_8x8_zscan_id = gau1_ctb_raster_to_zscan[(u1_x_off >> 2) + (u1_y_off << 2)] >>
                                      2;

                /* Special case to handle 8x8 CUs when 16x16 is split */
                ASSERT(ps_results->e_cu_size == CU_16x16);

                ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id].ps_best_results[0];

                if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
                    i4_intra_parts += 1;

                ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 1].ps_best_results[0];

                if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
                    i4_intra_parts += 1;

                ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 2].ps_best_results[0];

                if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
                    i4_intra_parts += 1;

                ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 3].ps_best_results[0];

                if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
                    i4_intra_parts += 1;
            }
            else if(ps_best_res[0].as_pu_results[0].pu.b1_intra_flag)
            {
                i4_intra_parts += 4;
            }
        }
    }

    /* Determine the max intra CU size indicated by IPE */
    intra_cu_size = CU_64x64;
    if(ps_cur_ipe_ctb->u1_split_flag)
    {
        intra_cu_size = CU_32x32;
        if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
        {
            intra_cu_size = CU_16x16;
        }
    }

    if(((i4_intra_parts > CU_MERGE_MAX_INTRA_PARTS) &&
        (intra_cu_size < ps_results_merge->e_cu_size) &&
        (ME_PRISTINE_QUALITY != ps_merge_prms->e_quality_preset)) ||
       (i4_intra_parts == 16))
    {
        S32 i4_merge_outcome;

        i4_merge_outcome = (CU_32x32 == ps_results_merge->e_cu_size)
                               ? (!ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag &&
                                  ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_valid_cu)
                               : (!ps_cur_ipe_ctb->u1_split_flag);

        i4_merge_outcome = i4_merge_outcome ||
                           (ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset);

        i4_merge_outcome = i4_merge_outcome &&
                           !(ps_subpel_prms->u1_is_cu_noisy && DISABLE_INTRA_WHEN_NOISY);

        if(i4_merge_outcome)
        {
            inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
            part_type_results_t *ps_best_result = ps_cu_results->ps_best_results;
            pu_t *ps_pu = &ps_best_result->as_pu_results->pu;

            ps_cu_results->u1_num_best_results = 1;
            ps_cu_results->u1_cu_size = ps_results_merge->e_cu_size;
            ps_cu_results->u1_x_off = u1_x_off;
            ps_cu_results->u1_y_off = u1_y_off;

            ps_best_result->u1_part_type = PRT_2Nx2N;
            ps_best_result->ai4_tu_split_flag[0] = 0;
            ps_best_result->ai4_tu_split_flag[1] = 0;
            ps_best_result->ai4_tu_split_flag[2] = 0;
            ps_best_result->ai4_tu_split_flag[3] = 0;
            ps_best_result->i4_tot_cost =
                (CU_64x64 == ps_results_merge->e_cu_size)
                    ? ps_cur_ipe_ctb->i4_best64x64_intra_cost
                    : ps_cur_ipe_ctb->ai4_best32x32_intra_cost[i4_32x32_id];

            ps_pu->b1_intra_flag = 1;
            ps_pu->b4_pos_x = u1_x_off >> 2;
            ps_pu->b4_pos_y = u1_y_off >> 2;
            ps_pu->b4_wd = (1 << (ps_results_merge->e_cu_size + 1)) - 1;
            ps_pu->b4_ht = ps_pu->b4_wd;
            ps_pu->mv.i1_l0_ref_idx = -1;
            ps_pu->mv.i1_l1_ref_idx = -1;
            ps_pu->mv.s_l0_mv.i2_mvx = INTRA_MV;
            ps_pu->mv.s_l0_mv.i2_mvy = INTRA_MV;
            ps_pu->mv.s_l1_mv.i2_mvx = INTRA_MV;
            ps_pu->mv.s_l1_mv.i2_mvy = INTRA_MV;

            return CU_MERGED;
        }
        else
        {
            return CU_SPLIT;
        }
    }

    if(i4_intra_parts)
    {
        i4_part_mask = ENABLE_2Nx2N;
    }

    ps_results_merge->u1_num_active_ref = (ps_ctxt->s_frm_prms.bidir_enabled) ? 2 : 1;

    hme_reset_search_results(ps_results_merge, i4_part_mask, MV_RES_QPEL);

    ps_results_merge->u1_num_active_ref = ps_merge_prms->i4_num_ref;
    ps_merge_prms->i4_num_pred_dir_actual = 0;

    if(ps_subpel_prms->u1_is_cu_noisy && ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
    {
        S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
        S32 i4_num_valid_parts;
        S32 i4_sigma_array_offset;

        i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);

        /*********************************************************************************************************************************************/
        /* i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values  */
        /* Logic is x/4 + ((y/4) x 16) : every 4 pixel increase in x equals one 4x4 block increment, every 4 pixel increase in y equals 16 4x4 block */
        /* increment as there will be 256 4x4 blocks in a CTB                                                                                        */
        /*********************************************************************************************************************************************/
        i4_sigma_array_offset = (ps_merge_prms->ps_results_merge->u1_x_off / 4) +
                                (ps_merge_prms->ps_results_merge->u1_y_off * 4);

        for(i = 0; i < i4_num_valid_parts; i++)
        {
            S32 i4_part_id = ai4_valid_part_ids[i];

            hme_compute_final_sigma_of_pu_from_base_blocks(
                ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
                ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
                au8_final_src_sigmaX,
                au8_final_src_sigmaXSquared,
                (CU_32x32 == ps_results_merge->e_cu_size) ? 32 : 64,
                4,
                i4_part_id,
                16);
        }

        ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX = au8_final_src_sigmaX;
        ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared = au8_final_src_sigmaXSquared;
    }

    /*************************************************************************/
    /* Loop through all ref idx and pick the merge candts and refine based   */
    /* on the active partitions. At this stage num ref will be 1 or 2        */
    /*************************************************************************/
    for(i4_search_idx = 0; i4_search_idx < ps_merge_prms->i4_num_ref; i4_search_idx++)
    {
        S32 i4_cands;
        U08 u1_pred_dir = 0;

        if((2 == ps_merge_prms->i4_num_ref) || (!ps_ctxt->s_frm_prms.bidir_enabled))
        {
            u1_pred_dir = i4_search_idx;
        }
        else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
        {
            u1_pred_dir = 1;
        }
        else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0)
        {
            u1_pred_dir = 0;
        }
        else
        {
            ASSERT(0);
        }

        /* call the function to pick and evaluate the merge candts, given */
        /* a ref id and a part mask.                                      */
        i4_cands = hme_pick_eval_merge_candts(
            ps_merge_prms,
            ps_subpel_prms,
            u1_pred_dir,
            i4_best_part_type,
            is_vert,
            ps_wt_inp_prms,
            i4_frm_qstep,
            ps_cmn_utils_optimised_function_list,
            ps_me_optimised_function_list);

        if(i4_cands)
        {
            ps_merge_prms->au1_pred_dir_searched[ps_merge_prms->i4_num_pred_dir_actual] =
                u1_pred_dir;
            ps_merge_prms->i4_num_pred_dir_actual++;
        }

        i4_num_merge_cands_evaluated += i4_cands;
    }

    /* Call the decide_part_types function here */
    /* Populate the new PU struct with the results post subpel refinement*/
    if(i4_num_merge_cands_evaluated)
    {
        inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;

        hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);

        ps_merge_prms->ps_inter_ctb_prms->i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
        ps_merge_prms->ps_inter_ctb_prms->i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;

        hme_populate_pus(
            ps_thrd_ctxt,
            ps_ctxt,
            ps_subpel_prms,
            ps_results_merge,
            ps_cu_results,
            ps_pu_results,
            ps_pu_result,
            ps_merge_prms->ps_inter_ctb_prms,
            &ps_ctxt->s_wt_pred,
            ps_merge_prms->ps_layer_ctxt,
            ps_merge_prms->au1_pred_dir_searched,
            ps_merge_prms->i4_num_pred_dir_actual);

        ps_cu_results->i4_inp_offset = (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);

        hme_decide_part_types(
            ps_cu_results,
            ps_pu_results,
            ps_merge_prms->ps_inter_ctb_prms,
            ps_ctxt,
            ps_cmn_utils_optimised_function_list,
            ps_me_optimised_function_list

        );

        /*****************************************************************/
        /* INSERT INTRA RESULTS AT 32x32/64x64 LEVEL.                    */
        /*****************************************************************/
#if DISABLE_INTRA_IN_BPICS
        if(1 != ((ME_XTREME_SPEED_25 == ps_merge_prms->e_quality_preset) &&
                 (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
#endif
        {
            if(!(DISABLE_INTRA_WHEN_NOISY && ps_merge_prms->ps_inter_ctb_prms->u1_is_cu_noisy))
            {
                hme_insert_intra_nodes_post_bipred(
                    ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
            }
        }
    }
    else
    {
        return CU_SPLIT;
    }

    /* We check the best result of ref idx 0 and compare for parent vs child */
    if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
       (CU_32x32 == ps_results_merge->e_cu_size))
    {
        i4_cost_parent = ps_results_merge->ps_cu_results->ps_best_results[0].i4_tot_cost;
        /*********************************************************************/
        /* Add the cost of signaling the CU tree bits.                       */
        /* Assuming parent is not split, then we signal 1 bit for this parent*/
        /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
        /* So, 4*lambda is extra for children cost. :Lokesh                  */
        /*********************************************************************/
        {
            pred_ctxt_t *ps_pred_ctxt = &ps_results_merge->as_pred_ctxt[0];

            i4_cost_children += ((4 * ps_pred_ctxt->lambda) >> (ps_pred_ctxt->lambda_q_shift));
        }

        if(i4_cost_parent < i4_cost_children)
        {
            return CU_MERGED;
        }

        return CU_SPLIT;
    }
    else
    {
        return CU_MERGED;
    }
}

#define COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, shift)                              \
    {                                                                                              \
        (ps_mv)->i2_mv_x = (ps_search_node)->s_mv.i2_mvx >> (shift);                               \
        (ps_mv)->i2_mv_y = (ps_search_node)->s_mv.i2_mvy >> (shift);                               \
        *(pi1_ref_idx) = (ps_search_node)->i1_ref_idx;                                             \
    }

/**
********************************************************************************
*  @fn     hme_update_mv_bank_noencode(search_results_t *ps_search_results,
*                               layer_mv_t *ps_layer_mv,
*                               S32 i4_search_blk_x,
*                               S32 i4_search_blk_y,
*                               mvbank_update_prms_t *ps_prms)
*
*  @brief  Updates the mv bank in case there is no further encodign to be done
*
*  @param[in]  ps_search_results: contains results for the block just searched
*
*  @param[in,out]  ps_layer_mv : Has pointer to mv bank amongst other things
*
*  @param[in] i4_search_blk_x  : col num of blk being searched
*
*  @param[in] i4_search_blk_y : row num of blk being searched
*
*  @param[in] ps_prms : contains certain parameters which govern how updatedone
*
*  @return None
********************************************************************************
*/

void hme_update_mv_bank_noencode(
    search_results_t *ps_search_results,
    layer_mv_t *ps_layer_mv,
    S32 i4_search_blk_x,
    S32 i4_search_blk_y,
    mvbank_update_prms_t *ps_prms)
{
    hme_mv_t *ps_mv;
    hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
    S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
    S32 i4_blk_x, i4_blk_y, i4_offset;
    S32 i4_j, i4_ref_id;
    search_node_t *ps_search_node;
    search_node_t *ps_search_node_8x8, *ps_search_node_4x4_1;
    search_node_t *ps_search_node_4x4_2, *ps_search_node_4x4_3;
    search_node_t *ps_search_node_4x4_4;

    i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
    i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
    i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;

    i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;

    /* Identify the correct offset in the mvbank and the reference id buf */
    ps_mv = ps_layer_mv->ps_mv + i4_offset;
    pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;

    /*************************************************************************/
    /* Supposing we store the mvs in the same blk size as we searched (e.g.  */
    /* we searched 8x8 blks and store results for 8x8 blks), then we can     */
    /* do a straightforward single update of results. This will have a 1-1   */
    /* correspondence.                                                       */
    /*************************************************************************/
    if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
    {
        for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
        {
            ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
            for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
            {
                COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, 0);
                ps_mv++;
                pi1_ref_idx++;
                ps_search_node++;
            }
        }
        return;
    }

    /*************************************************************************/
    /* Case where search blk size is 8x8, but we update 4x4 results. In this */
    /* case, we need to have NxN partitions enabled in search.               */
    /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
    /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
    /*************************************************************************/
    ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
    ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
    ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));

    /*************************************************************************/
    /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
    /* hence the below check.                                                */
    /*************************************************************************/
    ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);

    ps_mv1 = ps_mv;
    ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
    ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
    ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
    pi1_ref_idx1 = pi1_ref_idx;
    pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
    pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
    pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);

    for(i4_ref_id = 0; i4_ref_id < (S32)ps_search_results->u1_num_active_ref; i4_ref_id++)
    {
        ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];

        ps_search_node_4x4_1 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL];

        ps_search_node_4x4_2 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TR];

        ps_search_node_4x4_3 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BL];

        ps_search_node_4x4_4 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BR];

        COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
        ps_mv1++;
        pi1_ref_idx1++;
        ps_search_node_4x4_1++;
        COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
        ps_mv2++;
        pi1_ref_idx2++;
        ps_search_node_4x4_2++;
        COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
        ps_mv3++;
        pi1_ref_idx3++;
        ps_search_node_4x4_3++;
        COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
        ps_mv4++;
        pi1_ref_idx4++;
        ps_search_node_4x4_4++;

        if(ps_layer_mv->i4_num_mvs_per_ref > 1)
        {
            COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_8x8, 0);
            ps_mv1++;
            pi1_ref_idx1++;
            COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_8x8, 0);
            ps_mv2++;
            pi1_ref_idx2++;
            COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_8x8, 0);
            ps_mv3++;
            pi1_ref_idx3++;
            COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_8x8, 0);
            ps_mv4++;
            pi1_ref_idx4++;
        }

        for(i4_j = 2; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
        {
            COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
            ps_mv1++;
            pi1_ref_idx1++;
            ps_search_node_4x4_1++;
            COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
            ps_mv2++;
            pi1_ref_idx2++;
            ps_search_node_4x4_2++;
            COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
            ps_mv3++;
            pi1_ref_idx3++;
            ps_search_node_4x4_3++;
            COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
            ps_mv4++;
            pi1_ref_idx4++;
            ps_search_node_4x4_4++;
        }
    }
}

void hme_update_mv_bank_encode(
    search_results_t *ps_search_results,
    layer_mv_t *ps_layer_mv,
    S32 i4_search_blk_x,
    S32 i4_search_blk_y,
    mvbank_update_prms_t *ps_prms,
    U08 *pu1_pred_dir_searched,
    S32 i4_num_act_ref_l0)
{
    hme_mv_t *ps_mv;
    hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
    S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
    S32 i4_blk_x, i4_blk_y, i4_offset;
    S32 j, i, num_parts;
    search_node_t *ps_search_node_tl, *ps_search_node_tr;
    search_node_t *ps_search_node_bl, *ps_search_node_br;
    search_node_t s_zero_mv;
    WORD32 i4_part_type = ps_search_results->ps_cu_results->ps_best_results[0].u1_part_type;

    i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
    i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
    i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;

    i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;

    /* Identify the correct offset in the mvbank and the reference id buf */
    ps_mv = ps_layer_mv->ps_mv + i4_offset;
    pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;

    ASSERT(ps_layer_mv->e_blk_size == BLK_8x8);
    ASSERT(ps_prms->e_search_blk_size == BLK_16x16);

    /*************************************************************************/
    /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
    /* hence the below check.                                                */
    /*************************************************************************/
    ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_best_results);

    ps_mv1 = ps_mv;
    ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
    ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
    ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
    pi1_ref_idx1 = pi1_ref_idx;
    pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
    pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
    pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);

    /* Initialize zero mv: default mv used for intra mvs */
    s_zero_mv.s_mv.i2_mvx = 0;
    s_zero_mv.s_mv.i2_mvy = 0;
    s_zero_mv.i1_ref_idx = 0;

    if((ps_search_results->e_cu_size == CU_16x16) && (ps_search_results->u1_split_flag) &&
       (ps_search_results->i4_part_mask & ENABLE_NxN))
    {
        i4_part_type = PRT_NxN;
    }

    for(i = 0; i < ps_prms->i4_num_ref; i++)
    {
        for(j = 0; j < ps_layer_mv->i4_num_mvs_per_ref; j++)
        {
            WORD32 i4_part_id = ge_part_type_to_part_id[i4_part_type][0];

            num_parts = gau1_num_parts_in_part_type[i4_part_type];

            ps_search_node_tl =
                ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id];

            if(num_parts == 1)
            {
                ps_search_node_tr = ps_search_node_tl;
                ps_search_node_bl = ps_search_node_tl;
                ps_search_node_br = ps_search_node_tl;
            }
            else if(num_parts == 2)
            {
                /* For vertically oriented partitions, tl, bl pt to same result */
                /* For horizontally oriented partition, tl, tr pt to same result */
                /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
                /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
                /* and right 2 8x8 have 12x16R partition */
                if(gau1_is_vert_part[i4_part_type])
                {
                    ps_search_node_tr =
                        ps_search_results
                            ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
                    ps_search_node_bl = ps_search_node_tl;
                }
                else
                {
                    ps_search_node_tr = ps_search_node_tl;
                    ps_search_node_bl =
                        ps_search_results
                            ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
                }
                ps_search_node_br =
                    ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
            }
            else
            {
                /* 4 unique results */
                ps_search_node_tr =
                    ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
                ps_search_node_bl =
                    ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2];
                ps_search_node_br =
                    ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3];
            }

            if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
                ps_search_node_tl++;
            if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
                ps_search_node_tr++;
            if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
                ps_search_node_bl++;
            if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
                ps_search_node_br++;

            COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
            ps_mv1++;
            pi1_ref_idx1++;
            COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
            ps_mv2++;
            pi1_ref_idx2++;
            COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
            ps_mv3++;
            pi1_ref_idx3++;
            COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
            ps_mv4++;
            pi1_ref_idx4++;

            if(ps_prms->i4_num_results_to_store > 1)
            {
                ps_search_node_tl =
                    &ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id][1];

                if(num_parts == 1)
                {
                    ps_search_node_tr = ps_search_node_tl;
                    ps_search_node_bl = ps_search_node_tl;
                    ps_search_node_br = ps_search_node_tl;
                }
                else if(num_parts == 2)
                {
                    /* For vertically oriented partitions, tl, bl pt to same result */
                    /* For horizontally oriented partition, tl, tr pt to same result */
                    /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
                    /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
                    /* and right 2 8x8 have 12x16R partition */
                    if(gau1_is_vert_part[i4_part_type])
                    {
                        ps_search_node_tr =
                            &ps_search_results
                                 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
                        ps_search_node_bl = ps_search_node_tl;
                    }
                    else
                    {
                        ps_search_node_tr = ps_search_node_tl;
                        ps_search_node_bl =
                            &ps_search_results
                                 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
                    }
                    ps_search_node_br =
                        &ps_search_results
                             ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
                }
                else
                {
                    /* 4 unique results */
                    ps_search_node_tr =
                        &ps_search_results
                             ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
                    ps_search_node_bl =
                        &ps_search_results
                             ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2][1];
                    ps_search_node_br =
                        &ps_search_results
                             ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3][1];
                }

                if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
                    ps_search_node_tl++;
                if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
                    ps_search_node_tr++;
                if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
                    ps_search_node_bl++;
                if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
                    ps_search_node_br++;

                COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
                ps_mv1++;
                pi1_ref_idx1++;
                COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
                ps_mv2++;
                pi1_ref_idx2++;
                COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
                ps_mv3++;
                pi1_ref_idx3++;
                COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
                ps_mv4++;
                pi1_ref_idx4++;
            }
        }
    }
}

/**
********************************************************************************
*  @fn     hme_update_mv_bank_noencode(search_results_t *ps_search_results,
*                               layer_mv_t *ps_layer_mv,
*                               S32 i4_search_blk_x,
*                               S32 i4_search_blk_y,
*                               mvbank_update_prms_t *ps_prms)
*
*  @brief  Updates the mv bank in case there is no further encodign to be done
*
*  @param[in]  ps_search_results: contains results for the block just searched
*
*  @param[in,out]  ps_layer_mv : Has pointer to mv bank amongst other things
*
*  @param[in] i4_search_blk_x  : col num of blk being searched
*
*  @param[in] i4_search_blk_y : row num of blk being searched
*
*  @param[in] ps_prms : contains certain parameters which govern how updatedone
*
*  @return None
********************************************************************************
*/

void hme_update_mv_bank_in_l1_me(
    search_results_t *ps_search_results,
    layer_mv_t *ps_layer_mv,
    S32 i4_search_blk_x,
    S32 i4_search_blk_y,
    mvbank_update_prms_t *ps_prms)
{
    hme_mv_t *ps_mv;
    hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
    S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
    S32 i4_blk_x, i4_blk_y, i4_offset;
    S32 i4_j, i4_ref_id;
    search_node_t *ps_search_node;
    search_node_t *ps_search_node_8x8, *ps_search_node_4x4;

    i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
    i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
    i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;

    i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;

    /* Identify the correct offset in the mvbank and the reference id buf */
    ps_mv = ps_layer_mv->ps_mv + i4_offset;
    pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;

    /*************************************************************************/
    /* Supposing we store the mvs in the same blk size as we searched (e.g.  */
    /* we searched 8x8 blks and store results for 8x8 blks), then we can     */
    /* do a straightforward single update of results. This will have a 1-1   */
    /* correspondence.                                                       */
    /*************************************************************************/
    if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
    {
        search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 2];

        hme_mv_t *ps_mv_l0_root = ps_mv;
        hme_mv_t *ps_mv_l1_root =
            ps_mv + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);

        U32 u4_num_l0_results_updated = 0;
        U32 u4_num_l1_results_updated = 0;

        S08 *pi1_ref_idx_l0_root = pi1_ref_idx;
        S08 *pi1_ref_idx_l1_root =
            pi1_ref_idx_l0_root + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);

        for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
        {
            U32 *pu4_num_results_updated;
            search_node_t **pps_result_nodes;

            U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];

            if(u1_pred_dir_of_cur_ref)
            {
                pu4_num_results_updated = &u4_num_l1_results_updated;
                pps_result_nodes = &aps_result_nodes_sorted[1][0];
            }
            else
            {
                pu4_num_results_updated = &u4_num_l0_results_updated;
                pps_result_nodes = &aps_result_nodes_sorted[0][0];
            }

            ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];

            for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
            {
                hme_add_new_node_to_a_sorted_array(
                    &ps_search_node[i4_j], pps_result_nodes, NULL, *pu4_num_results_updated, 0);

                ASSERT(ps_search_node[i4_j].i1_ref_idx == i4_ref_id);
                (*pu4_num_results_updated)++;
            }
        }

        for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
        {
            COPY_SEARCH_RESULT(
                &ps_mv_l0_root[i4_j],
                &pi1_ref_idx_l0_root[i4_j],
                aps_result_nodes_sorted[0][i4_j],
                0);
        }

        for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
        {
            COPY_SEARCH_RESULT(
                &ps_mv_l1_root[i4_j],
                &pi1_ref_idx_l1_root[i4_j],
                aps_result_nodes_sorted[1][i4_j],
                0);
        }

        return;
    }

    /*************************************************************************/
    /* Case where search blk size is 8x8, but we update 4x4 results. In this */
    /* case, we need to have NxN partitions enabled in search.               */
    /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
    /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
    /*************************************************************************/
    ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
    ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
    ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));

    /*************************************************************************/
    /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
    /* hence the below check.                                                */
    /*************************************************************************/
    ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);

    ps_mv1 = ps_mv;
    ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
    ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
    ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
    pi1_ref_idx1 = pi1_ref_idx;
    pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
    pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
    pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);

    {
        search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 4];
        U08 au1_cost_shifts_for_sorted_node[2][MAX_NUM_REF * 4];

        S32 i;

        hme_mv_t *ps_mv1_l0_root = ps_mv1;
        hme_mv_t *ps_mv1_l1_root =
            ps_mv1 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
        hme_mv_t *ps_mv2_l0_root = ps_mv2;
        hme_mv_t *ps_mv2_l1_root =
            ps_mv2 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
        hme_mv_t *ps_mv3_l0_root = ps_mv3;
        hme_mv_t *ps_mv3_l1_root =
            ps_mv3 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
        hme_mv_t *ps_mv4_l0_root = ps_mv4;
        hme_mv_t *ps_mv4_l1_root =
            ps_mv4 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);

        U32 u4_num_l0_results_updated = 0;
        U32 u4_num_l1_results_updated = 0;

        S08 *pi1_ref_idx1_l0_root = pi1_ref_idx1;
        S08 *pi1_ref_idx1_l1_root = pi1_ref_idx1_l0_root + (ps_prms->i4_num_active_ref_l0 *
                                                            ps_layer_mv->i4_num_mvs_per_ref);
        S08 *pi1_ref_idx2_l0_root = pi1_ref_idx2;
        S08 *pi1_ref_idx2_l1_root = pi1_ref_idx2_l0_root + (ps_prms->i4_num_active_ref_l0 *
                                                            ps_layer_mv->i4_num_mvs_per_ref);
        S08 *pi1_ref_idx3_l0_root = pi1_ref_idx3;
        S08 *pi1_ref_idx3_l1_root = pi1_ref_idx3_l0_root + (ps_prms->i4_num_active_ref_l0 *
                                                            ps_layer_mv->i4_num_mvs_per_ref);
        S08 *pi1_ref_idx4_l0_root = pi1_ref_idx4;
        S08 *pi1_ref_idx4_l1_root = pi1_ref_idx4_l0_root + (ps_prms->i4_num_active_ref_l0 *
                                                            ps_layer_mv->i4_num_mvs_per_ref);

        for(i = 0; i < 4; i++)
        {
            hme_mv_t *ps_mv_l0_root;
            hme_mv_t *ps_mv_l1_root;

            S08 *pi1_ref_idx_l0_root;
            S08 *pi1_ref_idx_l1_root;

            for(i4_ref_id = 0; i4_ref_id < ps_search_results->u1_num_active_ref; i4_ref_id++)
            {
                U32 *pu4_num_results_updated;
                search_node_t **pps_result_nodes;
                U08 *pu1_cost_shifts_for_sorted_node;

                U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];

                if(u1_pred_dir_of_cur_ref)
                {
                    pu4_num_results_updated = &u4_num_l1_results_updated;
                    pps_result_nodes = &aps_result_nodes_sorted[1][0];
                    pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
                }
                else
                {
                    pu4_num_results_updated = &u4_num_l0_results_updated;
                    pps_result_nodes = &aps_result_nodes_sorted[0][0];
                    pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
                }

                ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];

                ps_search_node_4x4 =
                    ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL + i];

                for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
                {
                    hme_add_new_node_to_a_sorted_array(
                        &ps_search_node_4x4[i4_j],
                        pps_result_nodes,
                        pu1_cost_shifts_for_sorted_node,
                        *pu4_num_results_updated,
                        0);

                    (*pu4_num_results_updated)++;

                    hme_add_new_node_to_a_sorted_array(
                        &ps_search_node_8x8[i4_j],
                        pps_result_nodes,
                        pu1_cost_shifts_for_sorted_node,
                        *pu4_num_results_updated,
                        2);

                    (*pu4_num_results_updated)++;
                }
            }

            switch(i)
            {
            case 0:
            {
                ps_mv_l0_root = ps_mv1_l0_root;
                ps_mv_l1_root = ps_mv1_l1_root;

                pi1_ref_idx_l0_root = pi1_ref_idx1_l0_root;
                pi1_ref_idx_l1_root = pi1_ref_idx1_l1_root;

                break;
            }
            case 1:
            {
                ps_mv_l0_root = ps_mv2_l0_root;
                ps_mv_l1_root = ps_mv2_l1_root;

                pi1_ref_idx_l0_root = pi1_ref_idx2_l0_root;
                pi1_ref_idx_l1_root = pi1_ref_idx2_l1_root;

                break;
            }
            case 2:
            {
                ps_mv_l0_root = ps_mv3_l0_root;
                ps_mv_l1_root = ps_mv3_l1_root;

                pi1_ref_idx_l0_root = pi1_ref_idx3_l0_root;
                pi1_ref_idx_l1_root = pi1_ref_idx3_l1_root;

                break;
            }
            case 3:
            {
                ps_mv_l0_root = ps_mv4_l0_root;
                ps_mv_l1_root = ps_mv4_l1_root;

                pi1_ref_idx_l0_root = pi1_ref_idx4_l0_root;
                pi1_ref_idx_l1_root = pi1_ref_idx4_l1_root;

                break;
            }
            }

            u4_num_l0_results_updated =
                MIN((S32)u4_num_l0_results_updated,
                    ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);

            u4_num_l1_results_updated =
                MIN((S32)u4_num_l1_results_updated,
                    ps_prms->i4_num_active_ref_l1 * ps_layer_mv->i4_num_mvs_per_ref);

            for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
            {
                COPY_SEARCH_RESULT(
                    &ps_mv_l0_root[i4_j],
                    &pi1_ref_idx_l0_root[i4_j],
                    aps_result_nodes_sorted[0][i4_j],
                    0);
            }

            for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
            {
                COPY_SEARCH_RESULT(
                    &ps_mv_l1_root[i4_j],
                    &pi1_ref_idx_l1_root[i4_j],
                    aps_result_nodes_sorted[1][i4_j],
                    0);
            }
        }
    }
}

/**
******************************************************************************
*  @brief Scales motion vector component projecte from a diff layer in same
*         picture (so no ref id related delta poc scaling required)
******************************************************************************
*/

#define SCALE_MV_COMP_RES(mvcomp_p, dim_c, dim_p)                                                  \
    ((((mvcomp_p) * (dim_c)) + ((SIGN((mvcomp_p)) * (dim_p)) >> 1)) / (dim_p))
/**
********************************************************************************
*  @fn     hme_project_coloc_candt(search_node_t *ps_search_node,
*                                   layer_ctxt_t *ps_curr_layer,
*                                   layer_ctxt_t *ps_coarse_layer,
*                                   S32 i4_pos_x,
*                                   S32 i4_pos_y,
*                                   S08 i1_ref_id,
*                                   S08 i1_result_id)
*
*  @brief  From a coarser layer, projects a candidated situated at "colocated"
*          position in the picture (e.g. given x, y it will be x/2, y/2 dyadic
*
*  @param[out]  ps_search_node : contains the projected result
*
*  @param[in]   ps_curr_layer : current layer context
*
*  @param[in]   ps_coarse_layer  : coarser layer context
*
*  @param[in]   i4_pos_x  : x Position where mv is required (w.r.t. curr layer)
*
*  @param[in]   i4_pos_y  : y Position where mv is required (w.r.t. curr layer)
*
*  @param[in]   i1_ref_id : reference id for which the candidate required
*
*  @param[in]   i4_result_id : result id for which the candidate required
*                              (0 : best result, 1 : next best)
*
*  @return None
********************************************************************************
*/

void hme_project_coloc_candt(
    search_node_t *ps_search_node,
    layer_ctxt_t *ps_curr_layer,
    layer_ctxt_t *ps_coarse_layer,
    S32 i4_pos_x,
    S32 i4_pos_y,
    S08 i1_ref_id,
    S32 i4_result_id)
{
    S32 wd_c, ht_c, wd_p, ht_p;
    S32 blksize_p, blk_x, blk_y, i4_offset;
    layer_mv_t *ps_layer_mvbank;
    hme_mv_t *ps_mv;
    S08 *pi1_ref_idx;

    /* Width and ht of current and prev layers */
    wd_c = ps_curr_layer->i4_wd;
    ht_c = ps_curr_layer->i4_ht;
    wd_p = ps_coarse_layer->i4_wd;
    ht_p = ps_coarse_layer->i4_ht;

    ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
    blksize_p = (S32)gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];

    /* Safety check to avoid uninitialized access across temporal layers */
    i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
    i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));

    /* Project the positions to prev layer */
    /* TODO: convert these to scale factors at pic level */
    blk_x = (i4_pos_x * wd_p) / (wd_c * blksize_p);
    blk_y = (i4_pos_y * ht_p) / (ht_c * blksize_p);

    /* Pick up the mvs from the location */
    i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
    i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);

    ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
    pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;

    ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
    pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);

    ps_search_node->s_mv.i2_mvx = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_x, wd_c, wd_p);
    ps_search_node->s_mv.i2_mvy = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_y, ht_c, ht_p);
    ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
    ps_search_node->u1_subpel_done = 0;
    if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
    {
        ps_search_node->i1_ref_idx = i1_ref_id;
        ps_search_node->s_mv.i2_mvx = 0;
        ps_search_node->s_mv.i2_mvy = 0;
    }
}

/**
********************************************************************************
*  @fn     hme_project_coloc_candt_dyadic(search_node_t *ps_search_node,
*                                   layer_ctxt_t *ps_curr_layer,
*                                   layer_ctxt_t *ps_coarse_layer,
*                                   S32 i4_pos_x,
*                                   S32 i4_pos_y,
*                                   S08 i1_ref_id,
*                                   S08 i1_result_id)
*
*  @brief  From a coarser layer, projects a candidated situated at "colocated"
*          position in the picture when the ratios are dyadic
*
*  @param[out]  ps_search_node : contains the projected result
*
*  @param[in]   ps_curr_layer : current layer context
*
*  @param[in]   ps_coarse_layer  : coarser layer context
*
*  @param[in]   i4_pos_x  : x Position where mv is required (w.r.t. curr layer)
*
*  @param[in]   i4_pos_y  : y Position where mv is required (w.r.t. curr layer)
*
*  @param[in]   i1_ref_id : reference id for which the candidate required
*
*  @param[in]   i4_result_id : result id for which the candidate required
*                              (0 : best result, 1 : next best)
*
*  @return None
********************************************************************************
*/

void hme_project_coloc_candt_dyadic(
    search_node_t *ps_search_node,
    layer_ctxt_t *ps_curr_layer,
    layer_ctxt_t *ps_coarse_layer,
    S32 i4_pos_x,
    S32 i4_pos_y,
    S08 i1_ref_id,
    S32 i4_result_id)
{
    S32 wd_c, ht_c, wd_p, ht_p;
    S32 blksize_p, blk_x, blk_y, i4_offset;
    layer_mv_t *ps_layer_mvbank;
    hme_mv_t *ps_mv;
    S08 *pi1_ref_idx;

    /* Width and ht of current and prev layers */
    wd_c = ps_curr_layer->i4_wd;
    ht_c = ps_curr_layer->i4_ht;
    wd_p = ps_coarse_layer->i4_wd;
    ht_p = ps_coarse_layer->i4_ht;

    ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
    /* blksize_p = log2(wd) + 1 */
    blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];

    /* ASSERT for valid sizes */
    ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));

    /* Safety check to avoid uninitialized access across temporal layers */
    i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
    i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));

    /* Project the positions to prev layer */
    /* TODO: convert these to scale factors at pic level */
    blk_x = i4_pos_x >> blksize_p;  // (2 * blksize_p);
    blk_y = i4_pos_y >> blksize_p;  // (2 * blksize_p);

    /* Pick up the mvs from the location */
    i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
    i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);

    ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
    pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;

    ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
    pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);

    ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
    ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
    ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
    if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
    {
        ps_search_node->i1_ref_idx = i1_ref_id;
        ps_search_node->s_mv.i2_mvx = 0;
        ps_search_node->s_mv.i2_mvy = 0;
    }
}

void hme_project_coloc_candt_dyadic_implicit(
    search_node_t *ps_search_node,
    layer_ctxt_t *ps_curr_layer,
    layer_ctxt_t *ps_coarse_layer,
    S32 i4_pos_x,
    S32 i4_pos_y,
    S32 i4_num_act_ref_l0,
    U08 u1_pred_dir,
    U08 u1_default_ref_id,
    S32 i4_result_id)
{
    S32 wd_c, ht_c, wd_p, ht_p;
    S32 blksize_p, blk_x, blk_y, i4_offset;
    layer_mv_t *ps_layer_mvbank;
    hme_mv_t *ps_mv;
    S08 *pi1_ref_idx;

    /* Width and ht of current and prev layers */
    wd_c = ps_curr_layer->i4_wd;
    ht_c = ps_curr_layer->i4_ht;
    wd_p = ps_coarse_layer->i4_wd;
    ht_p = ps_coarse_layer->i4_ht;

    ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
    blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];

    /* ASSERT for valid sizes */
    ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));

    /* Safety check to avoid uninitialized access across temporal layers */
    i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
    i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
    /* Project the positions to prev layer */
    /* TODO: convert these to scale factors at pic level */
    blk_x = i4_pos_x >> blksize_p;  // (2 * blksize_p);
    blk_y = i4_pos_y >> blksize_p;  // (2 * blksize_p);

    /* Pick up the mvs from the location */
    i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
    i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);

    ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
    pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;

    if(u1_pred_dir == 1)
    {
        ps_mv += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
        pi1_ref_idx += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
    }

    ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
    ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
    ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
    if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
    {
        ps_search_node->i1_ref_idx = u1_default_ref_id;
        ps_search_node->s_mv.i2_mvx = 0;
        ps_search_node->s_mv.i2_mvy = 0;
    }
}

#define SCALE_RANGE_PRMS(prm1, prm2, shift)                                                        \
    {                                                                                              \
        prm1.i2_min_x = prm2.i2_min_x << shift;                                                    \
        prm1.i2_max_x = prm2.i2_max_x << shift;                                                    \
        prm1.i2_min_y = prm2.i2_min_y << shift;                                                    \
        prm1.i2_max_y = prm2.i2_max_y << shift;                                                    \
    }

#define SCALE_RANGE_PRMS_POINTERS(prm1, prm2, shift)                                               \
    {                                                                                              \
        prm1->i2_min_x = prm2->i2_min_x << shift;                                                  \
        prm1->i2_max_x = prm2->i2_max_x << shift;                                                  \
        prm1->i2_min_y = prm2->i2_min_y << shift;                                                  \
        prm1->i2_max_y = prm2->i2_max_y << shift;                                                  \
    }

/**
********************************************************************************
*  @fn   void hme_refine_frm_init(me_ctxt_t *ps_ctxt,
*                       refine_layer_prms_t *ps_refine_prms)
*
*  @brief  Frame init of refinemnet layers in ME
*
*  @param[in,out]  ps_ctxt: ME Handle
*
*  @param[in]  ps_refine_prms : refinement layer prms
*
*  @return None
********************************************************************************
*/
void hme_refine_frm_init(
    layer_ctxt_t *ps_curr_layer, refine_prms_t *ps_refine_prms, layer_ctxt_t *ps_coarse_layer)
{
    /* local variables */
    BLK_SIZE_T e_result_blk_size = BLK_8x8;
    S32 i4_num_ref_fpel, i4_num_ref_prev_layer;

    i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;

    if(ps_refine_prms->explicit_ref)
    {
        i4_num_ref_fpel = i4_num_ref_prev_layer;
    }
    else
    {
        i4_num_ref_fpel = 2;
    }

    if(ps_refine_prms->i4_enable_4x4_part)
    {
        e_result_blk_size = BLK_4x4;
    }

    i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);

    hme_init_mv_bank(
        ps_curr_layer,
        e_result_blk_size,
        i4_num_ref_fpel,
        ps_refine_prms->i4_num_mvbank_results,
        ps_refine_prms->i4_layer_id > 0 ? 0 : 1);
}

#if 1  //ENABLE_CU_RECURSION || TEST_AND_EVALUATE_CU_RECURSION
/**
********************************************************************************
*  @fn   void hme_init_clusters_16x16
*               (
*                   cluster_16x16_blk_t *ps_cluster_blk_16x16
*               )
*
*  @brief  Intialisations for the structs used in clustering algorithm
*
*  @param[in/out]  ps_cluster_blk_16x16: pointer to structure containing clusters
*                                        of 16x16 block
*
*  @return None
********************************************************************************
*/
static __inline void
    hme_init_clusters_16x16(cluster_16x16_blk_t *ps_cluster_blk_16x16, S32 bidir_enabled)
{
    S32 i;

    ps_cluster_blk_16x16->num_clusters = 0;
    ps_cluster_blk_16x16->intra_mv_area = 0;
    ps_cluster_blk_16x16->best_inter_cost = 0;

    for(i = 0; i < MAX_NUM_CLUSTERS_16x16; i++)
    {
        ps_cluster_blk_16x16->as_cluster_data[i].max_dist_from_centroid =
            bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_16x16_B : MAX_DISTANCE_FROM_CENTROID_16x16;

        ps_cluster_blk_16x16->as_cluster_data[i].is_valid_cluster = 0;

        ps_cluster_blk_16x16->as_cluster_data[i].bi_mv_pixel_area = 0;
        ps_cluster_blk_16x16->as_cluster_data[i].uni_mv_pixel_area = 0;
    }
    for(i = 0; i < MAX_NUM_REF; i++)
    {
        ps_cluster_blk_16x16->au1_num_clusters[i] = 0;
    }
}

/**
********************************************************************************
*  @fn   void hme_init_clusters_32x32
*               (
*                   cluster_32x32_blk_t *ps_cluster_blk_32x32
*               )
*
*  @brief  Intialisations for the structs used in clustering algorithm
*
*  @param[in/out]  ps_cluster_blk_32x32: pointer to structure containing clusters
*                                        of 32x32 block
*
*  @return None
********************************************************************************
*/
static __inline void
    hme_init_clusters_32x32(cluster_32x32_blk_t *ps_cluster_blk_32x32, S32 bidir_enabled)
{
    S32 i;

    ps_cluster_blk_32x32->num_clusters = 0;
    ps_cluster_blk_32x32->intra_mv_area = 0;
    ps_cluster_blk_32x32->best_alt_ref = -1;
    ps_cluster_blk_32x32->best_uni_ref = -1;
    ps_cluster_blk_32x32->best_inter_cost = 0;
    ps_cluster_blk_32x32->num_clusters_with_weak_sdi_density = 0;

    for(i = 0; i < MAX_NUM_CLUSTERS_32x32; i++)
    {
        ps_cluster_blk_32x32->as_cluster_data[i].max_dist_from_centroid =
            bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_32x32_B : MAX_DISTANCE_FROM_CENTROID_32x32;
        ps_cluster_blk_32x32->as_cluster_data[i].is_valid_cluster = 0;

        ps_cluster_blk_32x32->as_cluster_data[i].bi_mv_pixel_area = 0;
        ps_cluster_blk_32x32->as_cluster_data[i].uni_mv_pixel_area = 0;
    }
    for(i = 0; i < MAX_NUM_REF; i++)
    {
        ps_cluster_blk_32x32->au1_num_clusters[i] = 0;
    }
}

/**
********************************************************************************
*  @fn   void hme_init_clusters_64x64
*               (
*                   cluster_64x64_blk_t *ps_cluster_blk_64x64
*               )
*
*  @brief  Intialisations for the structs used in clustering algorithm
*
*  @param[in/out]  ps_cluster_blk_64x64: pointer to structure containing clusters
*                                        of 64x64 block
*
*  @return None
********************************************************************************
*/
static __inline void
    hme_init_clusters_64x64(cluster_64x64_blk_t *ps_cluster_blk_64x64, S32 bidir_enabled)
{
    S32 i;

    ps_cluster_blk_64x64->num_clusters = 0;
    ps_cluster_blk_64x64->intra_mv_area = 0;
    ps_cluster_blk_64x64->best_alt_ref = -1;
    ps_cluster_blk_64x64->best_uni_ref = -1;
    ps_cluster_blk_64x64->best_inter_cost = 0;

    for(i = 0; i < MAX_NUM_CLUSTERS_64x64; i++)
    {
        ps_cluster_blk_64x64->as_cluster_data[i].max_dist_from_centroid =
            bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_64x64_B : MAX_DISTANCE_FROM_CENTROID_64x64;
        ps_cluster_blk_64x64->as_cluster_data[i].is_valid_cluster = 0;

        ps_cluster_blk_64x64->as_cluster_data[i].bi_mv_pixel_area = 0;
        ps_cluster_blk_64x64->as_cluster_data[i].uni_mv_pixel_area = 0;
    }
    for(i = 0; i < MAX_NUM_REF; i++)
    {
        ps_cluster_blk_64x64->au1_num_clusters[i] = 0;
    }
}

/**
********************************************************************************
*  @fn   void hme_sort_and_assign_top_ref_ids_areawise
*               (
*                   ctb_cluster_info_t *ps_ctb_cluster_info
*               )
*
*  @brief  Finds best_uni_ref and best_alt_ref
*
*  @param[in/out]  ps_ctb_cluster_info: structure that points to ctb data
*
*  @param[in]  bidir_enabled: flag that indicates whether or not bi-pred is
*                             enabled
*
*  @param[in]  block_width: width of the block in pels
*
*  @param[in]  e_cu_pos: position of the block within the CTB
*
*  @return None
********************************************************************************
*/
void hme_sort_and_assign_top_ref_ids_areawise(
    ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width, CU_POS_T e_cu_pos)
{
    cluster_32x32_blk_t *ps_32x32 = NULL;
    cluster_64x64_blk_t *ps_64x64 = NULL;
    cluster_data_t *ps_data;

    S32 j, k;

    S32 ai4_uni_area[MAX_NUM_REF];
    S32 ai4_bi_area[MAX_NUM_REF];
    S32 ai4_ref_id_found[MAX_NUM_REF];
    S32 ai4_ref_id[MAX_NUM_REF];

    S32 best_uni_ref = -1, best_alt_ref = -1;
    S32 num_clusters;
    S32 num_ref = 0;
    S32 num_clusters_evaluated = 0;
    S32 is_cur_blk_valid;

    if(32 == block_width)
    {
        is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << e_cu_pos)) || 0;
        ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cu_pos];
        num_clusters = ps_32x32->num_clusters;
        ps_data = &ps_32x32->as_cluster_data[0];
    }
    else
    {
        is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask == 0xf);
        ps_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
        num_clusters = ps_64x64->num_clusters;
        ps_data = &ps_64x64->as_cluster_data[0];
    }

#if !ENABLE_4CTB_EVALUATION
    if((num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
    {
        return;
    }
#endif
    if(num_clusters == 0)
    {
        return;
    }
    else if(!is_cur_blk_valid)
    {
        return;
    }

    memset(ai4_uni_area, 0, sizeof(S32) * MAX_NUM_REF);
    memset(ai4_bi_area, 0, sizeof(S32) * MAX_NUM_REF);
    memset(ai4_ref_id_found, 0, sizeof(S32) * MAX_NUM_REF);
    memset(ai4_ref_id, -1, sizeof(S32) * MAX_NUM_REF);

    for(j = 0; num_clusters_evaluated < num_clusters; j++, ps_data++)
    {
        S32 ref_id;

        if(!ps_data->is_valid_cluster)
        {
            continue;
        }

        ref_id = ps_data->ref_id;

        num_clusters_evaluated++;

        ai4_uni_area[ref_id] += ps_data->uni_mv_pixel_area;
        ai4_bi_area[ref_id] += ps_data->bi_mv_pixel_area;

        if(!ai4_ref_id_found[ref_id])
        {
            ai4_ref_id[ref_id] = ref_id;
            ai4_ref_id_found[ref_id] = 1;
            num_ref++;
        }
    }

    {
        S32 ai4_ref_id_temp[MAX_NUM_REF];

        memcpy(ai4_ref_id_temp, ai4_ref_id, sizeof(S32) * MAX_NUM_REF);

        for(k = 1; k < MAX_NUM_REF; k++)
        {
            if(ai4_uni_area[k] > ai4_uni_area[0])
            {
                SWAP_HME(ai4_uni_area[k], ai4_uni_area[0], S32);
                SWAP_HME(ai4_ref_id_temp[k], ai4_ref_id_temp[0], S32);
            }
        }

        best_uni_ref = ai4_ref_id_temp[0];
    }

    if(bidir_enabled)
    {
        for(k = 1; k < MAX_NUM_REF; k++)
        {
            if(ai4_bi_area[k] > ai4_bi_area[0])
            {
                SWAP_HME(ai4_bi_area[k], ai4_bi_area[0], S32);
                SWAP_HME(ai4_ref_id[k], ai4_ref_id[0], S32);
            }
        }

        if(!ai4_bi_area[0])
        {
            best_alt_ref = -1;

            if(32 == block_width)
            {
                SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
            }
            else
            {
                SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
            }

            return;
        }

        if(best_uni_ref == ai4_ref_id[0])
        {
            for(k = 2; k < MAX_NUM_REF; k++)
            {
                if(ai4_bi_area[k] > ai4_bi_area[1])
                {
                    SWAP_HME(ai4_bi_area[k], ai4_bi_area[1], S32);
                    SWAP_HME(ai4_ref_id[k], ai4_ref_id[1], S32);
                }
            }

            best_alt_ref = ai4_ref_id[1];
        }
        else
        {
            best_alt_ref = ai4_ref_id[0];
        }
    }

    if(32 == block_width)
    {
        SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
    }
    else
    {
        SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
    }
}

/**
********************************************************************************
*  @fn   void hme_find_top_ref_ids
*               (
*                   ctb_cluster_info_t *ps_ctb_cluster_info
*               )
*
*  @brief  Finds best_uni_ref and best_alt_ref
*
*  @param[in/out]  ps_ctb_cluster_info: structure that points to ctb data
*
*  @return None
********************************************************************************
*/
void hme_find_top_ref_ids(
    ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width)
{
    S32 i;

    if(32 == block_width)
    {
        for(i = 0; i < 4; i++)
        {
            hme_sort_and_assign_top_ref_ids_areawise(
                ps_ctb_cluster_info, bidir_enabled, block_width, (CU_POS_T)i);
        }
    }
    else if(64 == block_width)
    {
        hme_sort_and_assign_top_ref_ids_areawise(
            ps_ctb_cluster_info, bidir_enabled, block_width, POS_NA);
    }
}

/**
********************************************************************************
*  @fn   void hme_boot_out_outlier
*               (
*                   ctb_cluster_info_t *ps_ctb_cluster_info
*               )
*
*  @brief  Removes outlier clusters before CU tree population
*
*  @param[in/out]  ps_ctb_cluster_info: structure that points to ctb data
*
*  @return None
********************************************************************************
*/
void hme_boot_out_outlier(ctb_cluster_info_t *ps_ctb_cluster_info, S32 blk_width)
{
    cluster_32x32_blk_t *ps_32x32;

    S32 i;

    cluster_64x64_blk_t *ps_64x64 = &ps_ctb_cluster_info->ps_64x64_blk[0];

    S32 sdi_threshold = ps_ctb_cluster_info->sdi_threshold;

    if(32 == blk_width)
    {
        /* 32x32 clusters */
        for(i = 0; i < 4; i++)
        {
            ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];

            if(ps_32x32->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
            {
                BUMP_OUTLIER_CLUSTERS(ps_32x32, sdi_threshold);
            }
        }
    }
    else if(64 == blk_width)
    {
        /* 64x64 clusters */
        if(ps_64x64->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
        {
            BUMP_OUTLIER_CLUSTERS(ps_64x64, sdi_threshold);
        }
    }
}

/**
********************************************************************************
*  @fn   void hme_update_cluster_attributes
*               (
*                   cluster_data_t *ps_cluster_data,
*                   S32 mvx,
*                   S32 mvy,
*                   PART_ID_T e_part_id
*               )
*
*  @brief  Implementation fo the clustering algorithm
*
*  @param[in/out]  ps_cluster_data: pointer to cluster_data_t struct
*
*  @param[in]  mvx : x co-ordinate of the motion vector
*
*  @param[in]  mvy : y co-ordinate of the motion vector
*
*  @param[in]  ref_idx : ref_id of the motion vector
*
*  @param[in]  e_part_id : partition id of the motion vector
*
*  @return None
********************************************************************************
*/
static __inline void hme_update_cluster_attributes(
    cluster_data_t *ps_cluster_data,
    S32 mvx,
    S32 mvy,
    S32 mvdx,
    S32 mvdy,
    S32 ref_id,
    S32 sdi,
    U08 is_part_of_bi,
    PART_ID_T e_part_id)
{
    LWORD64 i8_mvx_sum_q8;
    LWORD64 i8_mvy_sum_q8;

    S32 centroid_posx_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
    S32 centroid_posy_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;

    if((mvdx > 0) && (ps_cluster_data->min_x > mvx))
    {
        ps_cluster_data->min_x = mvx;
    }
    else if((mvdx < 0) && (ps_cluster_data->max_x < mvx))
    {
        ps_cluster_data->max_x = mvx;
    }

    if((mvdy > 0) && (ps_cluster_data->min_y > mvy))
    {
        ps_cluster_data->min_y = mvy;
    }
    else if((mvdy < 0) && (ps_cluster_data->max_y < mvy))
    {
        ps_cluster_data->max_y = mvy;
    }

    {
        S32 num_mvs = ps_cluster_data->num_mvs;

        ps_cluster_data->as_mv[num_mvs].pixel_count = gai4_partition_area[e_part_id];
        ps_cluster_data->as_mv[num_mvs].mvx = mvx;
        ps_cluster_data->as_mv[num_mvs].mvy = mvy;

        /***************************/
        ps_cluster_data->as_mv[num_mvs].is_uni = !is_part_of_bi;
        ps_cluster_data->as_mv[num_mvs].sdi = sdi;
        /**************************/
    }

    /* Updation of centroid */
    {
        i8_mvx_sum_q8 = (LWORD64)centroid_posx_q8 * ps_cluster_data->num_mvs + (mvx << 8);
        i8_mvy_sum_q8 = (LWORD64)centroid_posy_q8 * ps_cluster_data->num_mvs + (mvy << 8);

        ps_cluster_data->num_mvs++;

        ps_cluster_data->s_centroid.i4_pos_x_q8 =
            (WORD32)((i8_mvx_sum_q8) / ps_cluster_data->num_mvs);
        ps_cluster_data->s_centroid.i4_pos_y_q8 =
            (WORD32)((i8_mvy_sum_q8) / ps_cluster_data->num_mvs);
    }

    ps_cluster_data->area_in_pixels += gai4_partition_area[e_part_id];

    if(is_part_of_bi)
    {
        ps_cluster_data->bi_mv_pixel_area += gai4_partition_area[e_part_id];
    }
    else
    {
        ps_cluster_data->uni_mv_pixel_area += gai4_partition_area[e_part_id];
    }
}

/**
********************************************************************************
*  @fn   void hme_try_cluster_merge
*               (
*                   cluster_data_t *ps_cluster_data,
*                   S32 *pi4_num_clusters,
*                   S32 idx_of_updated_cluster
*               )
*
*  @brief  Implementation fo the clustering algorithm
*
*  @param[in/out]  ps_cluster_data: pointer to cluster_data_t struct
*
*  @param[in/out]  pi4_num_clusters : pointer to number of clusters
*
*  @param[in]  idx_of_updated_cluster : index of the cluster most recently
*                                       updated
*
*  @return Nothing
********************************************************************************
*/
void hme_try_cluster_merge(
    cluster_data_t *ps_cluster_data, U08 *pu1_num_clusters, S32 idx_of_updated_cluster)
{
    centroid_t *ps_centroid;

    S32 cur_pos_x_q8;
    S32 cur_pos_y_q8;
    S32 i;
    S32 max_dist_from_centroid;
    S32 mvd;
    S32 mvdx_q8;
    S32 mvdx;
    S32 mvdy_q8;
    S32 mvdy;
    S32 num_clusters, num_clusters_evaluated;
    S32 other_pos_x_q8;
    S32 other_pos_y_q8;

    cluster_data_t *ps_root = ps_cluster_data;
    cluster_data_t *ps_cur_cluster = &ps_cluster_data[idx_of_updated_cluster];
    centroid_t *ps_cur_centroid = &ps_cur_cluster->s_centroid;

    /* Merge is superfluous if num_clusters is 1 */
    if(*pu1_num_clusters == 1)
    {
        return;
    }

    cur_pos_x_q8 = ps_cur_centroid->i4_pos_x_q8;
    cur_pos_y_q8 = ps_cur_centroid->i4_pos_y_q8;

    max_dist_from_centroid = ps_cur_cluster->max_dist_from_centroid;

    num_clusters = *pu1_num_clusters;
    num_clusters_evaluated = 0;

    for(i = 0; num_clusters_evaluated < num_clusters; i++, ps_cluster_data++)
    {
        if(!ps_cluster_data->is_valid_cluster)
        {
            continue;
        }
        if((ps_cluster_data->ref_id != ps_cur_cluster->ref_id) || (i == idx_of_updated_cluster))
        {
            num_clusters_evaluated++;
            continue;
        }

        ps_centroid = &ps_cluster_data->s_centroid;

        other_pos_x_q8 = ps_centroid->i4_pos_x_q8;
        other_pos_y_q8 = ps_centroid->i4_pos_y_q8;

        mvdx_q8 = (cur_pos_x_q8 - other_pos_x_q8);
        mvdy_q8 = (cur_pos_y_q8 - other_pos_y_q8);
        mvdx = (mvdx_q8 + (1 << 7)) >> 8;
        mvdy = (mvdy_q8 + (1 << 7)) >> 8;

        mvd = ABS(mvdx) + ABS(mvdy);

        if(mvd <= (max_dist_from_centroid >> 1))
        {
            /* 0 => no updates */
            /* 1 => min updated */
            /* 2 => max updated */
            S32 minmax_x_update_id;
            S32 minmax_y_update_id;

            LWORD64 i8_mv_x_sum_self = (LWORD64)cur_pos_x_q8 * ps_cur_cluster->num_mvs;
            LWORD64 i8_mv_y_sum_self = (LWORD64)cur_pos_y_q8 * ps_cur_cluster->num_mvs;
            LWORD64 i8_mv_x_sum_cousin = (LWORD64)other_pos_x_q8 * ps_cluster_data->num_mvs;
            LWORD64 i8_mv_y_sum_cousin = (LWORD64)other_pos_y_q8 * ps_cluster_data->num_mvs;

            (*pu1_num_clusters)--;

            ps_cluster_data->is_valid_cluster = 0;

            memcpy(
                &ps_cur_cluster->as_mv[ps_cur_cluster->num_mvs],
                ps_cluster_data->as_mv,
                sizeof(mv_data_t) * ps_cluster_data->num_mvs);

            ps_cur_cluster->num_mvs += ps_cluster_data->num_mvs;
            ps_cur_cluster->area_in_pixels += ps_cluster_data->area_in_pixels;
            ps_cur_cluster->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
            ps_cur_cluster->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
            i8_mv_x_sum_self += i8_mv_x_sum_cousin;
            i8_mv_y_sum_self += i8_mv_y_sum_cousin;

            ps_cur_centroid->i4_pos_x_q8 = (WORD32)(i8_mv_x_sum_self / ps_cur_cluster->num_mvs);
            ps_cur_centroid->i4_pos_y_q8 = (WORD32)(i8_mv_y_sum_self / ps_cur_cluster->num_mvs);

            minmax_x_update_id = (ps_cur_cluster->min_x < ps_cluster_data->min_x)
                                     ? ((ps_cur_cluster->max_x > ps_cluster_data->max_x) ? 0 : 2)
                                     : 1;
            minmax_y_update_id = (ps_cur_cluster->min_y < ps_cluster_data->min_y)
                                     ? ((ps_cur_cluster->max_y > ps_cluster_data->max_y) ? 0 : 2)
                                     : 1;

            /* Updation of centroid spread */
            switch(minmax_x_update_id + (minmax_y_update_id << 2))
            {
            case 1:
            {
                S32 mvd, mvd_q8;

                ps_cur_cluster->min_x = ps_cluster_data->min_x;

                mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
                mvd = (mvd_q8 + (1 << 7)) >> 8;

                if(mvd > (max_dist_from_centroid))
                {
                    ps_cluster_data->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 2:
            {
                S32 mvd, mvd_q8;

                ps_cur_cluster->max_x = ps_cluster_data->max_x;

                mvd_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
                mvd = (mvd_q8 + (1 << 7)) >> 8;

                if(mvd > (max_dist_from_centroid))
                {
                    ps_cluster_data->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 4:
            {
                S32 mvd, mvd_q8;

                ps_cur_cluster->min_y = ps_cluster_data->min_y;

                mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
                mvd = (mvd_q8 + (1 << 7)) >> 8;

                if(mvd > (max_dist_from_centroid))
                {
                    ps_cluster_data->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 5:
            {
                S32 mvd;
                S32 mvdx, mvdx_q8;
                S32 mvdy, mvdy_q8;

                mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;

                mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;

                mvd = (mvdx > mvdy) ? mvdx : mvdy;

                ps_cur_cluster->min_x = ps_cluster_data->min_x;
                ps_cur_cluster->min_y = ps_cluster_data->min_y;

                if(mvd > max_dist_from_centroid)
                {
                    ps_cluster_data->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 6:
            {
                S32 mvd;
                S32 mvdx, mvdx_q8;
                S32 mvdy, mvdy_q8;

                mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;

                mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;

                mvd = (mvdx > mvdy) ? mvdx : mvdy;

                ps_cur_cluster->max_x = ps_cluster_data->max_x;
                ps_cur_cluster->min_y = ps_cluster_data->min_y;

                if(mvd > max_dist_from_centroid)
                {
                    ps_cluster_data->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 8:
            {
                S32 mvd, mvd_q8;

                ps_cur_cluster->max_y = ps_cluster_data->max_y;

                mvd_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
                mvd = (mvd_q8 + (1 << 7)) >> 8;

                if(mvd > (max_dist_from_centroid))
                {
                    ps_cluster_data->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 9:
            {
                S32 mvd;
                S32 mvdx, mvdx_q8;
                S32 mvdy, mvdy_q8;

                mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;

                mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;

                mvd = (mvdx > mvdy) ? mvdx : mvdy;

                ps_cur_cluster->min_x = ps_cluster_data->min_x;
                ps_cur_cluster->max_y = ps_cluster_data->max_y;

                if(mvd > max_dist_from_centroid)
                {
                    ps_cluster_data->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 10:
            {
                S32 mvd;
                S32 mvdx, mvdx_q8;
                S32 mvdy, mvdy_q8;

                mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;

                mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;

                mvd = (mvdx > mvdy) ? mvdx : mvdy;

                ps_cur_cluster->max_x = ps_cluster_data->max_x;
                ps_cur_cluster->max_y = ps_cluster_data->max_y;

                if(mvd > ps_cluster_data->max_dist_from_centroid)
                {
                    ps_cluster_data->max_dist_from_centroid = mvd;
                }
                break;
            }
            default:
            {
                break;
            }
            }

            hme_try_cluster_merge(ps_root, pu1_num_clusters, idx_of_updated_cluster);

            return;
        }

        num_clusters_evaluated++;
    }
}

/**
********************************************************************************
*  @fn   void hme_find_and_update_clusters
*               (
*                   cluster_data_t *ps_cluster_data,
*                   S32 *pi4_num_clusters,
*                   S32 mvx,
*                   S32 mvy,
*                   S32 ref_idx,
*                   PART_ID_T e_part_id
*               )
*
*  @brief  Implementation fo the clustering algorithm
*
*  @param[in/out]  ps_cluster_data: pointer to cluster_data_t struct
*
*  @param[in/out]  pi4_num_clusters : pointer to number of clusters
*
*  @param[in]  mvx : x co-ordinate of the motion vector
*
*  @param[in]  mvy : y co-ordinate of the motion vector
*
*  @param[in]  ref_idx : ref_id of the motion vector
*
*  @param[in]  e_part_id : partition id of the motion vector
*
*  @return None
********************************************************************************
*/
void hme_find_and_update_clusters(
    cluster_data_t *ps_cluster_data,
    U08 *pu1_num_clusters,
    S16 i2_mv_x,
    S16 i2_mv_y,
    U08 i1_ref_idx,
    S32 i4_sdi,
    PART_ID_T e_part_id,
    U08 is_part_of_bi)
{
    S32 i;
    S32 min_mvd_cluster_id = -1;
    S32 mvd, mvd_limit, mvdx, mvdy;
    S32 min_mvdx, min_mvdy;

    S32 min_mvd = MAX_32BIT_VAL;
    S32 num_clusters = *pu1_num_clusters;

    S32 mvx = i2_mv_x;
    S32 mvy = i2_mv_y;
    S32 ref_idx = i1_ref_idx;
    S32 sdi = i4_sdi;
    S32 new_cluster_idx = MAX_NUM_CLUSTERS_16x16;

    if(num_clusters == 0)
    {
        cluster_data_t *ps_data = &ps_cluster_data[num_clusters];

        ps_data->num_mvs = 1;
        ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
        ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
        ps_data->ref_id = ref_idx;
        ps_data->area_in_pixels = gai4_partition_area[e_part_id];
        ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
        ps_data->as_mv[0].mvx = mvx;
        ps_data->as_mv[0].mvy = mvy;

        /***************************/
        ps_data->as_mv[0].is_uni = !is_part_of_bi;
        ps_data->as_mv[0].sdi = sdi;
        if(is_part_of_bi)
        {
            ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
        }
        else
        {
            ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
        }
        /**************************/
        ps_data->max_x = mvx;
        ps_data->min_x = mvx;
        ps_data->max_y = mvy;
        ps_data->min_y = mvy;

        ps_data->is_valid_cluster = 1;

        *pu1_num_clusters = 1;
    }
    else
    {
        S32 num_clusters_evaluated = 0;

        for(i = 0; num_clusters_evaluated < num_clusters; i++)
        {
            cluster_data_t *ps_data = &ps_cluster_data[i];

            centroid_t *ps_centroid;

            S32 mvx_q8;
            S32 mvy_q8;
            S32 posx_q8;
            S32 posy_q8;
            S32 mvdx_q8;
            S32 mvdy_q8;

            /* In anticipation of a possible merging of clusters */
            if(ps_data->is_valid_cluster == 0)
            {
                new_cluster_idx = i;
                continue;
            }

            if(ref_idx != ps_data->ref_id)
            {
                num_clusters_evaluated++;
                continue;
            }

            ps_centroid = &ps_data->s_centroid;
            posx_q8 = ps_centroid->i4_pos_x_q8;
            posy_q8 = ps_centroid->i4_pos_y_q8;

            mvx_q8 = mvx << 8;
            mvy_q8 = mvy << 8;

            mvdx_q8 = posx_q8 - mvx_q8;
            mvdy_q8 = posy_q8 - mvy_q8;

            mvdx = (((mvdx_q8 + (1 << 7)) >> 8));
            mvdy = (((mvdy_q8 + (1 << 7)) >> 8));

            mvd = ABS(mvdx) + ABS(mvdy);

            if(mvd < min_mvd)
            {
                min_mvd = mvd;
                min_mvdx = mvdx;
                min_mvdy = mvdy;
                min_mvd_cluster_id = i;
            }

            num_clusters_evaluated++;
        }

        mvd_limit = (min_mvd_cluster_id == -1)
                        ? ps_cluster_data[0].max_dist_from_centroid
                        : ps_cluster_data[min_mvd_cluster_id].max_dist_from_centroid;

        /* This condition implies that min_mvd has been updated */
        if(min_mvd <= mvd_limit)
        {
            hme_update_cluster_attributes(
                &ps_cluster_data[min_mvd_cluster_id],
                mvx,
                mvy,
                min_mvdx,
                min_mvdy,
                ref_idx,
                sdi,
                is_part_of_bi,
                e_part_id);

            if(PRT_NxN == ge_part_id_to_part_type[e_part_id])
            {
                hme_try_cluster_merge(ps_cluster_data, pu1_num_clusters, min_mvd_cluster_id);
            }
        }
        else
        {
            cluster_data_t *ps_data = (new_cluster_idx == MAX_NUM_CLUSTERS_16x16)
                                          ? &ps_cluster_data[num_clusters]
                                          : &ps_cluster_data[new_cluster_idx];

            ps_data->num_mvs = 1;
            ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
            ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
            ps_data->ref_id = ref_idx;
            ps_data->area_in_pixels = gai4_partition_area[e_part_id];
            ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
            ps_data->as_mv[0].mvx = mvx;
            ps_data->as_mv[0].mvy = mvy;

            /***************************/
            ps_data->as_mv[0].is_uni = !is_part_of_bi;
            ps_data->as_mv[0].sdi = sdi;
            if(is_part_of_bi)
            {
                ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
            }
            else
            {
                ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
            }
            /**************************/
            ps_data->max_x = mvx;
            ps_data->min_x = mvx;
            ps_data->max_y = mvy;
            ps_data->min_y = mvy;

            ps_data->is_valid_cluster = 1;

            num_clusters++;
            *pu1_num_clusters = num_clusters;
        }
    }
}

/**
********************************************************************************
*  @fn   void hme_update_32x32_cluster_attributes
*               (
*                   cluster_32x32_blk_t *ps_blk_32x32,
*                   cluster_data_t *ps_cluster_data
*               )
*
*  @brief  Updates attributes for 32x32 clusters based on the attributes of
*          the constituent 16x16 clusters
*
*  @param[out]  ps_blk_32x32: structure containing 32x32 block results
*
*  @param[in]  ps_cluster_data : structure containing 16x16 block results
*
*  @return None
********************************************************************************
*/
void hme_update_32x32_cluster_attributes(
    cluster_32x32_blk_t *ps_blk_32x32, cluster_data_t *ps_cluster_data)
{
    cluster_data_t *ps_cur_cluster_32;

    S32 i;
    S32 mvd_limit;

    S32 num_clusters = ps_blk_32x32->num_clusters;

    if(0 == num_clusters)
    {
        ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];

        ps_blk_32x32->num_clusters++;
        ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;

        ps_cur_cluster_32->is_valid_cluster = 1;

        ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
        ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
        ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;

        memcpy(
            ps_cur_cluster_32->as_mv,
            ps_cluster_data->as_mv,
            sizeof(mv_data_t) * ps_cluster_data->num_mvs);

        ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;

        ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;

        ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
        ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
        ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
        ps_cur_cluster_32->min_y = ps_cluster_data->min_y;

        ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
    }
    else
    {
        centroid_t *ps_centroid;

        S32 cur_posx_q8, cur_posy_q8;
        S32 min_mvd_cluster_id = -1;
        S32 mvd;
        S32 mvdx;
        S32 mvdy;
        S32 mvdx_min;
        S32 mvdy_min;
        S32 mvdx_q8;
        S32 mvdy_q8;

        S32 num_clusters_evaluated = 0;

        S32 mvd_min = MAX_32BIT_VAL;

        S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
        S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;

        for(i = 0; num_clusters_evaluated < num_clusters; i++)
        {
            ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[i];

            if(ps_cur_cluster_32->ref_id != ps_cluster_data->ref_id)
            {
                num_clusters_evaluated++;
                continue;
            }
            if(!ps_cluster_data->is_valid_cluster)
            {
                continue;
            }

            num_clusters_evaluated++;

            ps_centroid = &ps_cur_cluster_32->s_centroid;

            cur_posx_q8 = ps_centroid->i4_pos_x_q8;
            cur_posy_q8 = ps_centroid->i4_pos_y_q8;

            mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
            mvdy_q8 = cur_posy_q8 - mvy_inp_q8;

            mvdx = (mvdx_q8 + (1 << 7)) >> 8;
            mvdy = (mvdy_q8 + (1 << 7)) >> 8;

            mvd = ABS(mvdx) + ABS(mvdy);

            if(mvd < mvd_min)
            {
                mvd_min = mvd;
                mvdx_min = mvdx;
                mvdy_min = mvdy;
                min_mvd_cluster_id = i;
            }
        }

        ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];

        mvd_limit = (min_mvd_cluster_id == -1)
                        ? ps_cur_cluster_32[0].max_dist_from_centroid
                        : ps_cur_cluster_32[min_mvd_cluster_id].max_dist_from_centroid;

        if(mvd_min <= mvd_limit)
        {
            LWORD64 i8_updated_posx;
            LWORD64 i8_updated_posy;
            WORD32 minmax_updated_x = 0;
            WORD32 minmax_updated_y = 0;

            ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[min_mvd_cluster_id];

            ps_centroid = &ps_cur_cluster_32->s_centroid;

            ps_cur_cluster_32->is_valid_cluster = 1;

            ps_cur_cluster_32->area_in_pixels += ps_cluster_data->area_in_pixels;
            ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
            ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;

            memcpy(
                &ps_cur_cluster_32->as_mv[ps_cur_cluster_32->num_mvs],
                ps_cluster_data->as_mv,
                sizeof(mv_data_t) * ps_cluster_data->num_mvs);

            if((mvdx_min > 0) && ((ps_cur_cluster_32->min_x << 8) > mvx_inp_q8))
            {
                ps_cur_cluster_32->min_x = (mvx_inp_q8 + ((1 << 7))) >> 8;
                minmax_updated_x = 1;
            }
            else if((mvdx_min < 0) && ((ps_cur_cluster_32->max_x << 8) < mvx_inp_q8))
            {
                ps_cur_cluster_32->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
                minmax_updated_x = 2;
            }

            if((mvdy_min > 0) && ((ps_cur_cluster_32->min_y << 8) > mvy_inp_q8))
            {
                ps_cur_cluster_32->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
                minmax_updated_y = 1;
            }
            else if((mvdy_min < 0) && ((ps_cur_cluster_32->max_y << 8) < mvy_inp_q8))
            {
                ps_cur_cluster_32->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
                minmax_updated_y = 2;
            }

            switch((minmax_updated_y << 2) + minmax_updated_x)
            {
            case 1:
            {
                S32 mvd, mvd_q8;

                mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
                mvd = (mvd_q8 + (1 << 7)) >> 8;

                if(mvd > (mvd_limit))
                {
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 2:
            {
                S32 mvd, mvd_q8;

                mvd_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
                mvd = (mvd_q8 + (1 << 7)) >> 8;

                if(mvd > (mvd_limit))
                {
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 4:
            {
                S32 mvd, mvd_q8;

                mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
                mvd = (mvd_q8 + (1 << 7)) >> 8;

                if(mvd > (mvd_limit))
                {
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 5:
            {
                S32 mvd;
                S32 mvdx, mvdx_q8;
                S32 mvdy, mvdy_q8;

                mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;

                mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;

                mvd = (mvdx > mvdy) ? mvdx : mvdy;

                if(mvd > mvd_limit)
                {
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 6:
            {
                S32 mvd;
                S32 mvdx, mvdx_q8;
                S32 mvdy, mvdy_q8;

                mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;

                mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;

                mvd = (mvdx > mvdy) ? mvdx : mvdy;

                if(mvd > mvd_limit)
                {
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 8:
            {
                S32 mvd, mvd_q8;

                mvd_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
                mvd = (mvd_q8 + (1 << 7)) >> 8;

                if(mvd > (mvd_limit))
                {
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 9:
            {
                S32 mvd;
                S32 mvdx, mvdx_q8;
                S32 mvdy, mvdy_q8;

                mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;

                mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;

                mvd = (mvdx > mvdy) ? mvdx : mvdy;

                if(mvd > mvd_limit)
                {
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 10:
            {
                S32 mvd;
                S32 mvdx, mvdx_q8;
                S32 mvdy, mvdy_q8;

                mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;

                mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;

                mvd = (mvdx > mvdy) ? mvdx : mvdy;

                if(mvd > ps_cur_cluster_32->max_dist_from_centroid)
                {
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
                }
                break;
            }
            default:
            {
                break;
            }
            }

            i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_32->num_mvs) +
                              ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
            i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_32->num_mvs) +
                              ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);

            ps_cur_cluster_32->num_mvs += ps_cluster_data->num_mvs;

            ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_32->num_mvs);
            ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_32->num_mvs);
        }
        else if(num_clusters < MAX_NUM_CLUSTERS_32x32)
        {
            ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[num_clusters];

            ps_blk_32x32->num_clusters++;
            ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;

            ps_cur_cluster_32->is_valid_cluster = 1;

            ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
            ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
            ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;

            memcpy(
                ps_cur_cluster_32->as_mv,
                ps_cluster_data->as_mv,
                sizeof(mv_data_t) * ps_cluster_data->num_mvs);

            ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;

            ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;

            ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
            ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
            ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
            ps_cur_cluster_32->min_y = ps_cluster_data->min_y;

            ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
        }
    }
}

/**
********************************************************************************
*  @fn   void hme_update_64x64_cluster_attributes
*               (
*                   cluster_64x64_blk_t *ps_blk_32x32,
*                   cluster_data_t *ps_cluster_data
*               )
*
*  @brief  Updates attributes for 64x64 clusters based on the attributes of
*          the constituent 16x16 clusters
*
*  @param[out]  ps_blk_64x64: structure containing 64x64 block results
*
*  @param[in]  ps_cluster_data : structure containing 32x32 block results
*
*  @return None
********************************************************************************
*/
void hme_update_64x64_cluster_attributes(
    cluster_64x64_blk_t *ps_blk_64x64, cluster_data_t *ps_cluster_data)
{
    cluster_data_t *ps_cur_cluster_64;

    S32 i;
    S32 mvd_limit;

    S32 num_clusters = ps_blk_64x64->num_clusters;

    if(0 == num_clusters)
    {
        ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[0];

        ps_blk_64x64->num_clusters++;
        ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;

        ps_cur_cluster_64->is_valid_cluster = 1;

        ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
        ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
        ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;

        memcpy(
            ps_cur_cluster_64->as_mv,
            ps_cluster_data->as_mv,
            sizeof(mv_data_t) * ps_cluster_data->num_mvs);

        ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;

        ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;

        ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
        ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
        ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
        ps_cur_cluster_64->min_y = ps_cluster_data->min_y;

        ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
    }
    else
    {
        centroid_t *ps_centroid;

        S32 cur_posx_q8, cur_posy_q8;
        S32 min_mvd_cluster_id = -1;
        S32 mvd;
        S32 mvdx;
        S32 mvdy;
        S32 mvdx_min;
        S32 mvdy_min;
        S32 mvdx_q8;
        S32 mvdy_q8;

        S32 num_clusters_evaluated = 0;

        S32 mvd_min = MAX_32BIT_VAL;

        S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
        S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;

        for(i = 0; num_clusters_evaluated < num_clusters; i++)
        {
            ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[i];

            if(ps_cur_cluster_64->ref_id != ps_cluster_data->ref_id)
            {
                num_clusters_evaluated++;
                continue;
            }

            if(!ps_cur_cluster_64->is_valid_cluster)
            {
                continue;
            }

            num_clusters_evaluated++;

            ps_centroid = &ps_cur_cluster_64->s_centroid;

            cur_posx_q8 = ps_centroid->i4_pos_x_q8;
            cur_posy_q8 = ps_centroid->i4_pos_y_q8;

            mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
            mvdy_q8 = cur_posy_q8 - mvy_inp_q8;

            mvdx = (mvdx_q8 + (1 << 7)) >> 8;
            mvdy = (mvdy_q8 + (1 << 7)) >> 8;

            mvd = ABS(mvdx) + ABS(mvdy);

            if(mvd < mvd_min)
            {
                mvd_min = mvd;
                mvdx_min = mvdx;
                mvdy_min = mvdy;
                min_mvd_cluster_id = i;
            }
        }

        ps_cur_cluster_64 = ps_blk_64x64->as_cluster_data;

        mvd_limit = (min_mvd_cluster_id == -1)
                        ? ps_cur_cluster_64[0].max_dist_from_centroid
                        : ps_cur_cluster_64[min_mvd_cluster_id].max_dist_from_centroid;

        if(mvd_min <= mvd_limit)
        {
            LWORD64 i8_updated_posx;
            LWORD64 i8_updated_posy;
            WORD32 minmax_updated_x = 0;
            WORD32 minmax_updated_y = 0;

            ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[min_mvd_cluster_id];

            ps_centroid = &ps_cur_cluster_64->s_centroid;

            ps_cur_cluster_64->is_valid_cluster = 1;

            ps_cur_cluster_64->area_in_pixels += ps_cluster_data->area_in_pixels;
            ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
            ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;

            memcpy(
                &ps_cur_cluster_64->as_mv[ps_cur_cluster_64->num_mvs],
                ps_cluster_data->as_mv,
                sizeof(mv_data_t) * ps_cluster_data->num_mvs);

            if((mvdx_min > 0) && ((ps_cur_cluster_64->min_x << 8) > mvx_inp_q8))
            {
                ps_cur_cluster_64->min_x = (mvx_inp_q8 + (1 << 7)) >> 8;
                minmax_updated_x = 1;
            }
            else if((mvdx_min < 0) && ((ps_cur_cluster_64->max_x << 8) < mvx_inp_q8))
            {
                ps_cur_cluster_64->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
                minmax_updated_x = 2;
            }

            if((mvdy_min > 0) && ((ps_cur_cluster_64->min_y << 8) > mvy_inp_q8))
            {
                ps_cur_cluster_64->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
                minmax_updated_y = 1;
            }
            else if((mvdy_min < 0) && ((ps_cur_cluster_64->max_y << 8) < mvy_inp_q8))
            {
                ps_cur_cluster_64->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
                minmax_updated_y = 2;
            }

            switch((minmax_updated_y << 2) + minmax_updated_x)
            {
            case 1:
            {
                S32 mvd, mvd_q8;

                mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
                mvd = (mvd_q8 + (1 << 7)) >> 8;

                if(mvd > (mvd_limit))
                {
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 2:
            {
                S32 mvd, mvd_q8;

                mvd_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
                mvd = (mvd_q8 + (1 << 7)) >> 8;

                if(mvd > (mvd_limit))
                {
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 4:
            {
                S32 mvd, mvd_q8;

                mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
                mvd = (mvd_q8 + (1 << 7)) >> 8;

                if(mvd > (mvd_limit))
                {
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 5:
            {
                S32 mvd;
                S32 mvdx, mvdx_q8;
                S32 mvdy, mvdy_q8;

                mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;

                mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;

                mvd = (mvdx > mvdy) ? mvdx : mvdy;

                if(mvd > mvd_limit)
                {
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 6:
            {
                S32 mvd;
                S32 mvdx, mvdx_q8;
                S32 mvdy, mvdy_q8;

                mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;

                mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;

                mvd = (mvdx > mvdy) ? mvdx : mvdy;

                if(mvd > mvd_limit)
                {
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 8:
            {
                S32 mvd, mvd_q8;

                mvd_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
                mvd = (mvd_q8 + (1 << 7)) >> 8;

                if(mvd > (mvd_limit))
                {
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 9:
            {
                S32 mvd;
                S32 mvdx, mvdx_q8;
                S32 mvdy, mvdy_q8;

                mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;

                mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;

                mvd = (mvdx > mvdy) ? mvdx : mvdy;

                if(mvd > mvd_limit)
                {
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 10:
            {
                S32 mvd;
                S32 mvdx, mvdx_q8;
                S32 mvdy, mvdy_q8;

                mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;

                mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;

                mvd = (mvdx > mvdy) ? mvdx : mvdy;

                if(mvd > ps_cur_cluster_64->max_dist_from_centroid)
                {
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
                }
                break;
            }
            default:
            {
                break;
            }
            }

            i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_64->num_mvs) +
                              ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
            i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_64->num_mvs) +
                              ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);

            ps_cur_cluster_64->num_mvs += ps_cluster_data->num_mvs;

            ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_64->num_mvs);
            ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_64->num_mvs);
        }
        else if(num_clusters < MAX_NUM_CLUSTERS_64x64)
        {
            ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[num_clusters];

            ps_blk_64x64->num_clusters++;
            ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;

            ps_cur_cluster_64->is_valid_cluster = 1;

            ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
            ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
            ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;

            memcpy(
                &ps_cur_cluster_64->as_mv[0],
                ps_cluster_data->as_mv,
                sizeof(mv_data_t) * ps_cluster_data->num_mvs);

            ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;

            ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;

            ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
            ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
            ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
            ps_cur_cluster_64->min_y = ps_cluster_data->min_y;

            ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
        }
    }
}

/**
********************************************************************************
*  @fn   void hme_update_32x32_clusters
*               (
*                   cluster_32x32_blk_t *ps_blk_32x32,
*                   cluster_16x16_blk_t *ps_blk_16x16
*               )
*
*  @brief  Updates attributes for 32x32 clusters based on the attributes of
*          the constituent 16x16 clusters
*
*  @param[out]  ps_blk_32x32: structure containing 32x32 block results
*
*  @param[in]  ps_blk_16x16 : structure containing 16x16 block results
*
*  @return None
********************************************************************************
*/
static __inline void
    hme_update_32x32_clusters(cluster_32x32_blk_t *ps_blk_32x32, cluster_16x16_blk_t *ps_blk_16x16)
{
    cluster_16x16_blk_t *ps_blk_16x16_cur;
    cluster_data_t *ps_cur_cluster;

    S32 i, j;
    S32 num_clusters_cur_16x16_blk;

    for(i = 0; i < 4; i++)
    {
        S32 num_clusters_evaluated = 0;

        ps_blk_16x16_cur = &ps_blk_16x16[i];

        num_clusters_cur_16x16_blk = ps_blk_16x16_cur->num_clusters;

        ps_blk_32x32->intra_mv_area += ps_blk_16x16_cur->intra_mv_area;

        ps_blk_32x32->best_inter_cost += ps_blk_16x16_cur->best_inter_cost;

        for(j = 0; num_clusters_evaluated < num_clusters_cur_16x16_blk; j++)
        {
            ps_cur_cluster = &ps_blk_16x16_cur->as_cluster_data[j];

            if(!ps_cur_cluster->is_valid_cluster)
            {
                continue;
            }

            hme_update_32x32_cluster_attributes(ps_blk_32x32, ps_cur_cluster);

            num_clusters_evaluated++;
        }
    }
}

/**
********************************************************************************
*  @fn   void hme_update_64x64_clusters
*               (
*                   cluster_64x64_blk_t *ps_blk_64x64,
*                   cluster_32x32_blk_t *ps_blk_32x32
*               )
*
*  @brief  Updates attributes for 64x64 clusters based on the attributes of
*          the constituent 16x16 clusters
*
*  @param[out]  ps_blk_64x64: structure containing 32x32 block results
*
*  @param[in]  ps_blk_32x32 : structure containing 16x16 block results
*
*  @return None
********************************************************************************
*/
static __inline void
    hme_update_64x64_clusters(cluster_64x64_blk_t *ps_blk_64x64, cluster_32x32_blk_t *ps_blk_32x32)
{
    cluster_32x32_blk_t *ps_blk_32x32_cur;
    cluster_data_t *ps_cur_cluster;

    S32 i, j;
    S32 num_clusters_cur_32x32_blk;

    for(i = 0; i < 4; i++)
    {
        S32 num_clusters_evaluated = 0;

        ps_blk_32x32_cur = &ps_blk_32x32[i];

        num_clusters_cur_32x32_blk = ps_blk_32x32_cur->num_clusters;

        ps_blk_64x64->intra_mv_area += ps_blk_32x32_cur->intra_mv_area;
        ps_blk_64x64->best_inter_cost += ps_blk_32x32_cur->best_inter_cost;

        for(j = 0; num_clusters_evaluated < num_clusters_cur_32x32_blk; j++)
        {
            ps_cur_cluster = &ps_blk_32x32_cur->as_cluster_data[j];

            if(!ps_cur_cluster->is_valid_cluster)
            {
                continue;
            }

            hme_update_64x64_cluster_attributes(ps_blk_64x64, ps_cur_cluster);

            num_clusters_evaluated++;
        }
    }
}

/**
********************************************************************************
*  @fn   void hme_try_merge_clusters_blksize_gt_16
*               (
*                   cluster_data_t *ps_cluster_data,
*                   S32 num_clusters
*               )
*
*  @brief  Merging clusters from blocks of size 32x32 and greater
*
*  @param[in/out]  ps_cluster_data: structure containing cluster data
*
*  @param[in/out]  pi4_num_clusters : pointer to number of clusters
*
*  @return Success or failure
********************************************************************************
*/
S32 hme_try_merge_clusters_blksize_gt_16(cluster_data_t *ps_cluster_data, S32 num_clusters)
{
    centroid_t *ps_cur_centroid;
    cluster_data_t *ps_cur_cluster;

    S32 i, mvd;
    S32 mvdx, mvdy, mvdx_q8, mvdy_q8;

    centroid_t *ps_centroid = &ps_cluster_data->s_centroid;

    S32 mvd_limit = ps_cluster_data->max_dist_from_centroid;
    S32 ref_id = ps_cluster_data->ref_id;

    S32 node0_posx_q8 = ps_centroid->i4_pos_x_q8;
    S32 node0_posy_q8 = ps_centroid->i4_pos_y_q8;
    S32 num_clusters_evaluated = 1;
    S32 ret_value = 0;

    if(1 >= num_clusters)
    {
        return ret_value;
    }

    for(i = 1; num_clusters_evaluated < num_clusters; i++)
    {
        S32 cur_posx_q8;
        S32 cur_posy_q8;

        ps_cur_cluster = &ps_cluster_data[i];

        if((ref_id != ps_cur_cluster->ref_id))
        {
            num_clusters_evaluated++;
            continue;
        }

        if((!ps_cur_cluster->is_valid_cluster))
        {
            continue;
        }

        num_clusters_evaluated++;

        ps_cur_centroid = &ps_cur_cluster->s_centroid;

        cur_posx_q8 = ps_cur_centroid->i4_pos_x_q8;
        cur_posy_q8 = ps_cur_centroid->i4_pos_y_q8;

        mvdx_q8 = cur_posx_q8 - node0_posx_q8;
        mvdy_q8 = cur_posy_q8 - node0_posy_q8;

        mvdx = (mvdx_q8 + (1 << 7)) >> 8;
        mvdy = (mvdy_q8 + (1 << 7)) >> 8;

        mvd = ABS(mvdx) + ABS(mvdy);

        if(mvd <= (mvd_limit >> 1))
        {
            LWORD64 i8_updated_posx;
            LWORD64 i8_updated_posy;
            WORD32 minmax_updated_x = 0;
            WORD32 minmax_updated_y = 0;

            ps_cur_cluster->is_valid_cluster = 0;

            ps_cluster_data->area_in_pixels += ps_cur_cluster->area_in_pixels;
            ps_cluster_data->bi_mv_pixel_area += ps_cur_cluster->bi_mv_pixel_area;
            ps_cluster_data->uni_mv_pixel_area += ps_cur_cluster->uni_mv_pixel_area;

            memcpy(
                &ps_cluster_data->as_mv[ps_cluster_data->num_mvs],
                ps_cur_cluster->as_mv,
                sizeof(mv_data_t) * ps_cur_cluster->num_mvs);

            if(mvdx > 0)
            {
                ps_cluster_data->min_x = (cur_posx_q8 + (1 << 7)) >> 8;
                minmax_updated_x = 1;
            }
            else
            {
                ps_cluster_data->max_x = (cur_posx_q8 + (1 << 7)) >> 8;
                minmax_updated_x = 2;
            }

            if(mvdy > 0)
            {
                ps_cluster_data->min_y = (cur_posy_q8 + (1 << 7)) >> 8;
                minmax_updated_y = 1;
            }
            else
            {
                ps_cluster_data->max_y = (cur_posy_q8 + (1 << 7)) >> 8;
                minmax_updated_y = 2;
            }

            switch((minmax_updated_y << 2) + minmax_updated_x)
            {
            case 1:
            {
                S32 mvd, mvd_q8;

                mvd_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
                mvd = (mvd_q8 + (1 << 7)) >> 8;

                if(mvd > (mvd_limit))
                {
                    ps_cluster_data->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 2:
            {
                S32 mvd, mvd_q8;

                mvd_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
                mvd = (mvd_q8 + (1 << 7)) >> 8;

                if(mvd > (mvd_limit))
                {
                    ps_cluster_data->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 4:
            {
                S32 mvd, mvd_q8;

                mvd_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
                mvd = (mvd_q8 + (1 << 7)) >> 8;

                if(mvd > (mvd_limit))
                {
                    ps_cluster_data->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 5:
            {
                S32 mvd;
                S32 mvdx, mvdx_q8;
                S32 mvdy, mvdy_q8;

                mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;

                mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;

                mvd = (mvdx > mvdy) ? mvdx : mvdy;

                if(mvd > mvd_limit)
                {
                    ps_cluster_data->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 6:
            {
                S32 mvd;
                S32 mvdx, mvdx_q8;
                S32 mvdy, mvdy_q8;

                mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;

                mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;

                mvd = (mvdx > mvdy) ? mvdx : mvdy;

                if(mvd > mvd_limit)
                {
                    ps_cluster_data->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 8:
            {
                S32 mvd, mvd_q8;

                mvd_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
                mvd = (mvd_q8 + (1 << 7)) >> 8;

                if(mvd > (mvd_limit))
                {
                    ps_cluster_data->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 9:
            {
                S32 mvd;
                S32 mvdx, mvdx_q8;
                S32 mvdy, mvdy_q8;

                mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;

                mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;

                mvd = (mvdx > mvdy) ? mvdx : mvdy;

                if(mvd > mvd_limit)
                {
                    ps_cluster_data->max_dist_from_centroid = mvd;
                }
                break;
            }
            case 10:
            {
                S32 mvd;
                S32 mvdx, mvdx_q8;
                S32 mvdy, mvdy_q8;

                mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;

                mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;

                mvd = (mvdx > mvdy) ? mvdx : mvdy;

                if(mvd > ps_cluster_data->max_dist_from_centroid)
                {
                    ps_cluster_data->max_dist_from_centroid = mvd;
                }
                break;
            }
            default:
            {
                break;
            }
            }

            i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cluster_data->num_mvs) +
                              ((LWORD64)cur_posx_q8 * ps_cur_cluster->num_mvs);
            i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cluster_data->num_mvs) +
                              ((LWORD64)cur_posy_q8 * ps_cur_cluster->num_mvs);

            ps_cluster_data->num_mvs += ps_cur_cluster->num_mvs;

            ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cluster_data->num_mvs);
            ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cluster_data->num_mvs);

            if(MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK >= num_clusters)
            {
                num_clusters--;
                num_clusters_evaluated = 1;
                i = 0;
                ret_value++;
            }
            else
            {
                ret_value++;

                return ret_value;
            }
        }
    }

    if(ret_value)
    {
        for(i = 1; i < (num_clusters + ret_value); i++)
        {
            if(ps_cluster_data[i].is_valid_cluster)
            {
                break;
            }
        }
        if(i == (num_clusters + ret_value))
        {
            return ret_value;
        }
    }
    else
    {
        i = 1;
    }

    return (hme_try_merge_clusters_blksize_gt_16(&ps_cluster_data[i], num_clusters - 1)) +
           ret_value;
}

/**
********************************************************************************
*  @fn   S32 hme_determine_validity_32x32
*               (
*                   ctb_cluster_info_t *ps_ctb_cluster_info
*               )
*
*  @brief  Determines whther current 32x32 block needs to be evaluated in enc_loop
*           while recursing through the CU tree or not
*
*  @param[in]  ps_cluster_data: structure containing cluster data
*
*  @return Success or failure
********************************************************************************
*/
__inline S32 hme_determine_validity_32x32(
    ctb_cluster_info_t *ps_ctb_cluster_info,
    S32 *pi4_children_nodes_required,
    S32 blk_validity_wrt_pic_bndry,
    S32 parent_blk_validity_wrt_pic_bndry)
{
    cluster_data_t *ps_data;

    cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
    cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;

    S32 num_clusters = ps_32x32_blk->num_clusters;
    S32 num_clusters_parent = ps_64x64_blk->num_clusters;

    if(!blk_validity_wrt_pic_bndry)
    {
        *pi4_children_nodes_required = 1;
        return 0;
    }

    if(!parent_blk_validity_wrt_pic_bndry)
    {
        *pi4_children_nodes_required = 1;
        return 1;
    }

    if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
    {
        *pi4_children_nodes_required = 1;
        return 0;
    }

    if(num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
    {
        *pi4_children_nodes_required = 1;

        return 1;
    }
    else if(num_clusters_parent < MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
    {
        *pi4_children_nodes_required = 0;

        return 1;
    }
    else
    {
        if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
        {
            *pi4_children_nodes_required = 0;
            return 1;
        }
        else
        {
            S32 i;

            S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 4;
            S32 min_area = MAX_32BIT_VAL;
            S32 num_clusters_evaluated = 0;

            for(i = 0; num_clusters_evaluated < num_clusters; i++)
            {
                ps_data = &ps_32x32_blk->as_cluster_data[i];

                if(!ps_data->is_valid_cluster)
                {
                    continue;
                }

                num_clusters_evaluated++;

                if(ps_data->area_in_pixels < min_area)
                {
                    min_area = ps_data->area_in_pixels;
                }
            }

            if((min_area << 4) < area_of_parent)
            {
                *pi4_children_nodes_required = 1;
                return 0;
            }
            else
            {
                *pi4_children_nodes_required = 0;
                return 1;
            }
        }
    }
}

/**
********************************************************************************
*  @fn   S32 hme_determine_validity_16x16
*               (
*                   ctb_cluster_info_t *ps_ctb_cluster_info
*               )
*
*  @brief  Determines whther current 16x16 block needs to be evaluated in enc_loop
*           while recursing through the CU tree or not
*
*  @param[in]  ps_cluster_data: structure containing cluster data
*
*  @return Success or failure
********************************************************************************
*/
__inline S32 hme_determine_validity_16x16(
    ctb_cluster_info_t *ps_ctb_cluster_info,
    S32 *pi4_children_nodes_required,
    S32 blk_validity_wrt_pic_bndry,
    S32 parent_blk_validity_wrt_pic_bndry)
{
    cluster_data_t *ps_data;

    cluster_16x16_blk_t *ps_16x16_blk = ps_ctb_cluster_info->ps_16x16_blk;
    cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
    cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;

    S32 num_clusters = ps_16x16_blk->num_clusters;
    S32 num_clusters_parent = ps_32x32_blk->num_clusters;
    S32 num_clusters_grandparent = ps_64x64_blk->num_clusters;

    if(!blk_validity_wrt_pic_bndry)
    {
        *pi4_children_nodes_required = 1;
        return 0;
    }

    if(!parent_blk_validity_wrt_pic_bndry)
    {
        *pi4_children_nodes_required = 1;
        return 1;
    }

    if((num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
       (num_clusters_grandparent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
    {
        *pi4_children_nodes_required = 1;
        return 1;
    }

    /* Implies nc_64 <= 3 when num_clusters_parent > 3 & */
    /* implies nc_64 > 3 when num_clusters_parent < 3 & */
    if(num_clusters_parent != MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
    {
        if(num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
        {
            *pi4_children_nodes_required = 0;

            return 1;
        }
        else
        {
            *pi4_children_nodes_required = 1;

            return 0;
        }
    }
    /* Implies nc_64 >= 3 */
    else
    {
        if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
        {
            *pi4_children_nodes_required = 0;
            return 1;
        }
        else if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
        {
            *pi4_children_nodes_required = 1;
            return 0;
        }
        else
        {
            S32 i;

            S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 2;
            S32 min_area = MAX_32BIT_VAL;
            S32 num_clusters_evaluated = 0;

            for(i = 0; num_clusters_evaluated < num_clusters; i++)
            {
                ps_data = &ps_16x16_blk->as_cluster_data[i];

                if(!ps_data->is_valid_cluster)
                {
                    continue;
                }

                num_clusters_evaluated++;

                if(ps_data->area_in_pixels < min_area)
                {
                    min_area = ps_data->area_in_pixels;
                }
            }

            if((min_area << 4) < area_of_parent)
            {
                *pi4_children_nodes_required = 1;
                return 0;
            }
            else
            {
                *pi4_children_nodes_required = 0;
                return 1;
            }
        }
    }
}

/**
********************************************************************************
*  @fn   void hme_build_cu_tree
*               (
*                   ctb_cluster_info_t *ps_ctb_cluster_info,
*                   cur_ctb_cu_tree_t *ps_cu_tree,
*                   S32 tree_depth,
*                   CU_POS_T e_grand_parent_blk_pos,
*                   CU_POS_T e_parent_blk_pos,
*                   CU_POS_T e_cur_blk_pos
*               )
*
*  @brief  Recursive function for CU tree initialisation
*
*  @param[in]  ps_ctb_cluster_info: structure containing pointers to clusters
*                                   corresponding to all block sizes from 64x64
*                                   to 16x16
*
*  @param[in]  e_parent_blk_pos: position of parent block wrt its parent, if
*                                applicable
*
*  @param[in]  e_cur_blk_pos: position of current block wrt parent
*
*  @param[out]  ps_cu_tree : represents CU tree used in CU recursion
*
*  @param[in]  tree_depth : specifies depth of the CU tree
*
*  @return Nothing
********************************************************************************
*/
void hme_build_cu_tree(
    ctb_cluster_info_t *ps_ctb_cluster_info,
    cur_ctb_cu_tree_t *ps_cu_tree,
    S32 tree_depth,
    CU_POS_T e_grandparent_blk_pos,
    CU_POS_T e_parent_blk_pos,
    CU_POS_T e_cur_blk_pos)
{
    ihevce_cu_tree_init(
        ps_cu_tree,
        ps_ctb_cluster_info->ps_cu_tree_root,
        &ps_ctb_cluster_info->nodes_created_in_cu_tree,
        tree_depth,
        e_grandparent_blk_pos,
        e_parent_blk_pos,
        e_cur_blk_pos);
}

/**
********************************************************************************
*  @fn   S32 hme_sdi_based_cluster_spread_eligibility
*               (
*                   cluster_32x32_blk_t *ps_blk_32x32
*               )
*
*  @brief  Determines whether the spread of high SDI MV's around each cluster
*          center is below a pre-determined threshold
*
*  @param[in]  ps_blk_32x32: structure containing pointers to clusters
*                                   corresponding to all block sizes from 64x64
*                                   to 16x16
*
*  @return 1 if the spread is constrained, else 0
********************************************************************************
*/
__inline S32
    hme_sdi_based_cluster_spread_eligibility(cluster_32x32_blk_t *ps_blk_32x32, S32 sdi_threshold)
{
    S32 cumulative_mv_distance;
    S32 i, j;
    S32 num_high_sdi_mvs;

    S32 num_clusters = ps_blk_32x32->num_clusters;

    for(i = 0; i < num_clusters; i++)
    {
        cluster_data_t *ps_data = &ps_blk_32x32->as_cluster_data[i];

        num_high_sdi_mvs = 0;
        cumulative_mv_distance = 0;

        for(j = 0; j < ps_data->num_mvs; j++)
        {
            mv_data_t *ps_mv = &ps_data->as_mv[j];

            if(ps_mv->sdi >= sdi_threshold)
            {
                num_high_sdi_mvs++;

                COMPUTE_MVD(ps_mv, ps_data, cumulative_mv_distance);
            }
        }

        if(cumulative_mv_distance > ((ps_data->max_dist_from_centroid >> 1) * num_high_sdi_mvs))
        {
            return 0;
        }
    }

    return 1;
}

/**
********************************************************************************
*  @fn   S32 hme_populate_cu_tree
*               (
*                   ctb_cluster_info_t *ps_ctb_cluster_info,
*                   ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
*                   cur_ctb_cu_tree_t *ps_cu_tree,
*                   S32 tree_depth,
*                   CU_POS_T e_parent_blk_pos,
*                   CU_POS_T e_cur_blk_pos
*               )
*
*  @brief  Recursive function for CU tree population based on output of
*          clustering algorithm
*
*  @param[in]  ps_ctb_cluster_info: structure containing pointers to clusters
*                                   corresponding to all block sizes from 64x64
*                                   to 16x16
*
*  @param[in]  e_parent_blk_pos: position of parent block wrt its parent, if
applicable
*
*  @param[in]  e_cur_blk_pos: position of current block wrt parent
*
*  @param[in]  ps_cur_ipe_ctb : output container for ipe analyses
*
*  @param[out]  ps_cu_tree : represents CU tree used in CU recursion
*
*  @param[in]  tree_depth : specifies depth of the CU tree
*
*  @param[in]  ipe_decision_precedence : specifies whether precedence should
*               be given to decisions made either by IPE(1) or clustering algos.
*
*  @return 1 if re-evaluation of parent node's validity is not required,
else 0
********************************************************************************
*/
void hme_populate_cu_tree(
    ctb_cluster_info_t *ps_ctb_cluster_info,
    cur_ctb_cu_tree_t *ps_cu_tree,
    S32 tree_depth,
    ME_QUALITY_PRESETS_T e_quality_preset,
    CU_POS_T e_grandparent_blk_pos,
    CU_POS_T e_parent_blk_pos,
    CU_POS_T e_cur_blk_pos)
{
    S32 area_of_cur_blk;
    S32 area_limit_for_me_decision_precedence;
    S32 children_nodes_required;
    S32 intra_mv_area;
    S32 intra_eval_enable;
    S32 inter_eval_enable;
    S32 ipe_decision_precedence;
    S32 node_validity;
    S32 num_clusters;

    ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb = ps_ctb_cluster_info->ps_cur_ipe_ctb;

    if(NULL == ps_cu_tree)
    {
        return;
    }

    switch(tree_depth)
    {
    case 0:
    {
        /* 64x64 block */
        S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;

        cluster_64x64_blk_t *ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;

        area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 4;
        area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
        children_nodes_required = 0;
        intra_mv_area = ps_blk_64x64->intra_mv_area;

        ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);

        intra_eval_enable = ipe_decision_precedence;
        inter_eval_enable = !!ps_blk_64x64->num_clusters;

#if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
        if(e_quality_preset >= ME_HIGH_QUALITY)
        {
            inter_eval_enable = 1;
            node_validity = (blk_32x32_mask == 0xf);
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
            ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
#endif
            break;
        }
#endif

#if ENABLE_4CTB_EVALUATION
        node_validity = (blk_32x32_mask == 0xf);

        break;
#else
        {
            S32 i;

            num_clusters = ps_blk_64x64->num_clusters;

            node_validity = (ipe_decision_precedence)
                                ? (!ps_cur_ipe_ctb->u1_split_flag)
                                : (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK);

            for(i = 0; i < MAX_NUM_REF; i++)
            {
                node_validity = node_validity && (ps_blk_64x64->au1_num_clusters[i] <=
                                                  MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
            }

            node_validity = node_validity && (blk_32x32_mask == 0xf);
        }
        break;
#endif
    }
    case 1:
    {
        /* 32x32 block */
        S32 is_percent_intra_area_gt_threshold;

        cluster_32x32_blk_t *ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cur_blk_pos];

        S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;

#if !ENABLE_4CTB_EVALUATION
        S32 best_inter_cost = ps_blk_32x32->best_inter_cost;
        S32 best_intra_cost =
            ((ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
              ps_ctb_cluster_info->i4_frame_qstep * ps_ctb_cluster_info->i4_frame_qstep_multiplier *
                  4) < 0)
                ? MAX_32BIT_VAL
                : (ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
                   ps_ctb_cluster_info->i4_frame_qstep *
                       ps_ctb_cluster_info->i4_frame_qstep_multiplier * 4);
        S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
        S32 cost_differential = (best_inter_cost - best_cost);
#endif

        area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 2;
        area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
        intra_mv_area = ps_blk_32x32->intra_mv_area;
        is_percent_intra_area_gt_threshold =
            (intra_mv_area > area_limit_for_me_decision_precedence);
        ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);

        intra_eval_enable = ipe_decision_precedence;
        inter_eval_enable = !!ps_blk_32x32->num_clusters;
        children_nodes_required = 1;

#if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
        if(e_quality_preset >= ME_HIGH_QUALITY)
        {
            inter_eval_enable = 1;
            node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
            ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
#endif
            break;
        }
#endif

#if ENABLE_4CTB_EVALUATION
        node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);

        break;
#else
        {
            S32 i;
            num_clusters = ps_blk_32x32->num_clusters;

            if(ipe_decision_precedence)
            {
                node_validity = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_merge_flag);
                node_validity = node_validity && (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
            }
            else
            {
                node_validity =
                    ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential)) &&
                    (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
                    (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);

                for(i = 0; (i < MAX_NUM_REF) && (node_validity); i++)
                {
                    node_validity = node_validity && (ps_blk_32x32->au1_num_clusters[i] <=
                                                      MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
                }

                if(node_validity)
                {
                    node_validity = node_validity &&
                                    hme_sdi_based_cluster_spread_eligibility(
                                        ps_blk_32x32, ps_ctb_cluster_info->sdi_threshold);
                }
            }
        }

        break;
#endif
    }
    case 2:
    {
        cluster_16x16_blk_t *ps_blk_16x16 =
            &ps_ctb_cluster_info->ps_16x16_blk[e_cur_blk_pos + (e_parent_blk_pos << 2)];

        S32 blk_8x8_mask =
            ps_ctb_cluster_info->pi4_blk_8x8_mask[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];

        area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N];
        area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
        children_nodes_required = 1;
        intra_mv_area = ps_blk_16x16->intra_mv_area;
        ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
        num_clusters = ps_blk_16x16->num_clusters;

        intra_eval_enable = ipe_decision_precedence;
        inter_eval_enable = 1;

#if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
        if(e_quality_preset >= ME_HIGH_QUALITY)
        {
            node_validity =
                !ps_ctb_cluster_info
                     ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
            children_nodes_required = !node_validity;
            break;
        }
#endif

#if ENABLE_4CTB_EVALUATION
        node_validity = (blk_8x8_mask == 0xf);

#if ENABLE_CU_TREE_CULLING
        {
            cur_ctb_cu_tree_t *ps_32x32_root;

            switch(e_parent_blk_pos)
            {
            case POS_TL:
            {
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;

                break;
            }
            case POS_TR:
            {
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;

                break;
            }
            case POS_BL:
            {
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;

                break;
            }
            case POS_BR:
            {
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;

                break;
            }
            }

            if(ps_32x32_root->is_node_valid)
            {
                node_validity =
                    node_validity &&
                    !ps_ctb_cluster_info
                         ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
                children_nodes_required = !node_validity;
            }
        }
#endif

        break;
#else

        if(ipe_decision_precedence)
        {
            S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
                                     .as_intra16_analyse[e_cur_blk_pos]
                                     .b1_merge_flag);
            S32 valid_flag = (blk_8x8_mask == 0xf);

            node_validity = merge_flag_16 && valid_flag;
        }
        else
        {
            node_validity = (blk_8x8_mask == 0xf);
        }

        break;
#endif
    }
    case 3:
    {
        S32 blk_8x8_mask =
            ps_ctb_cluster_info
                ->pi4_blk_8x8_mask[(S32)(e_grandparent_blk_pos << 2) + e_parent_blk_pos];
        S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos]
                                 .as_intra16_analyse[e_parent_blk_pos]
                                 .b1_merge_flag);
        S32 merge_flag_32 =
            (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos].b1_merge_flag);

        intra_eval_enable = !merge_flag_16 || !merge_flag_32;
        inter_eval_enable = 1;
        children_nodes_required = 0;

#if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
        if(e_quality_preset >= ME_HIGH_QUALITY)
        {
            node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
            break;
        }
#endif

#if ENABLE_4CTB_EVALUATION
        node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);

        break;
#else
        {
            cur_ctb_cu_tree_t *ps_32x32_root;
            cur_ctb_cu_tree_t *ps_16x16_root;
            cluster_32x32_blk_t *ps_32x32_blk;

            switch(e_grandparent_blk_pos)
            {
            case POS_TL:
            {
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;

                break;
            }
            case POS_TR:
            {
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;

                break;
            }
            case POS_BL:
            {
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;

                break;
            }
            case POS_BR:
            {
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;

                break;
            }
            }

            switch(e_parent_blk_pos)
            {
            case POS_TL:
            {
                ps_16x16_root = ps_32x32_root->ps_child_node_tl;

                break;
            }
            case POS_TR:
            {
                ps_16x16_root = ps_32x32_root->ps_child_node_tr;

                break;
            }
            case POS_BL:
            {
                ps_16x16_root = ps_32x32_root->ps_child_node_bl;

                break;
            }
            case POS_BR:
            {
                ps_16x16_root = ps_32x32_root->ps_child_node_br;

                break;
            }
            }

            ps_32x32_blk = &ps_ctb_cluster_info->ps_32x32_blk[e_grandparent_blk_pos];

            node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0) &&
                            ((!ps_32x32_root->is_node_valid) ||
                             (ps_32x32_blk->num_clusters_with_weak_sdi_density > 0) ||
                             (!ps_16x16_root->is_node_valid));

            break;
        }
#endif
    }
    }

    /* Fill the current cu_tree node */
    ps_cu_tree->is_node_valid = node_validity;
    ps_cu_tree->u1_intra_eval_enable = intra_eval_enable;
    ps_cu_tree->u1_inter_eval_enable = inter_eval_enable;

    if(children_nodes_required)
    {
        tree_depth++;

        hme_populate_cu_tree(
            ps_ctb_cluster_info,
            ps_cu_tree->ps_child_node_tl,
            tree_depth,
            e_quality_preset,
            e_parent_blk_pos,
            e_cur_blk_pos,
            POS_TL);

        hme_populate_cu_tree(
            ps_ctb_cluster_info,
            ps_cu_tree->ps_child_node_tr,
            tree_depth,
            e_quality_preset,
            e_parent_blk_pos,
            e_cur_blk_pos,
            POS_TR);

        hme_populate_cu_tree(
            ps_ctb_cluster_info,
            ps_cu_tree->ps_child_node_bl,
            tree_depth,
            e_quality_preset,
            e_parent_blk_pos,
            e_cur_blk_pos,
            POS_BL);

        hme_populate_cu_tree(
            ps_ctb_cluster_info,
            ps_cu_tree->ps_child_node_br,
            tree_depth,
            e_quality_preset,
            e_parent_blk_pos,
            e_cur_blk_pos,
            POS_BR);
    }
}

/**
********************************************************************************
*  @fn   void hme_analyse_mv_clustering
*               (
*                   search_results_t *ps_search_results,
*                   ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
*                   cur_ctb_cu_tree_t *ps_cu_tree
*               )
*
*  @brief  Implementation for the clustering algorithm
*
*  @param[in]  ps_search_results: structure containing 16x16 block results
*
*  @param[in]  ps_cur_ipe_ctb : output container for ipe analyses
*
*  @param[out]  ps_cu_tree : represents CU tree used in CU recursion
*
*  @return None
********************************************************************************
*/
void hme_analyse_mv_clustering(
    search_results_t *ps_search_results,
    inter_cu_results_t *ps_16x16_cu_results,
    inter_cu_results_t *ps_8x8_cu_results,
    ctb_cluster_info_t *ps_ctb_cluster_info,
    S08 *pi1_future_list,
    S08 *pi1_past_list,
    S32 bidir_enabled,
    ME_QUALITY_PRESETS_T e_quality_preset)
{
    cluster_16x16_blk_t *ps_blk_16x16;
    cluster_32x32_blk_t *ps_blk_32x32;
    cluster_64x64_blk_t *ps_blk_64x64;

    part_type_results_t *ps_best_result;
    pu_result_t *aps_part_result[MAX_NUM_PARTS];
    pu_result_t *aps_inferior_parts[MAX_NUM_PARTS];

    PART_ID_T e_part_id;
    PART_TYPE_T e_part_type;

    S32 enable_64x64_merge;
    S32 i, j, k;
    S32 mvx, mvy;
    S32 num_parts;
    S32 ref_idx;
    S32 ai4_pred_mode[MAX_NUM_PARTS];

    S32 num_32x32_merges = 0;

    /*****************************************/
    /*****************************************/
    /********* Enter ye who is HQ ************/
    /*****************************************/
    /*****************************************/

    ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;

    /* Initialise data in each of the clusters */
    for(i = 0; i < 16; i++)
    {
        ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];

#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
        if(e_quality_preset < ME_HIGH_QUALITY)
        {
            hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
        }
        else
        {
            ps_blk_16x16->best_inter_cost = 0;
            ps_blk_16x16->intra_mv_area = 0;
        }
#else
        hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
#endif
    }

    for(i = 0; i < 4; i++)
    {
        ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];

#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
        if(e_quality_preset < ME_HIGH_QUALITY)
        {
            hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
        }
        else
        {
            ps_blk_32x32->best_inter_cost = 0;
            ps_blk_32x32->intra_mv_area = 0;
        }
#else
        hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
#endif
    }

#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
    if(e_quality_preset < ME_HIGH_QUALITY)
    {
        hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
    }
    else
    {
        ps_blk_64x64->best_inter_cost = 0;
        ps_blk_64x64->intra_mv_area = 0;
    }
#else
    hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
#endif

    /* Initialise data for all nodes in the CU tree */
    hme_build_cu_tree(
        ps_ctb_cluster_info, ps_ctb_cluster_info->ps_cu_tree_root, 0, POS_NA, POS_NA, POS_NA);

    if(e_quality_preset >= ME_HIGH_QUALITY)
    {
        memset(ps_ctb_cluster_info->au1_is_16x16_blk_split, 1, 16 * sizeof(U08));
    }

#if ENABLE_UNIFORM_CU_SIZE_16x16 || ENABLE_UNIFORM_CU_SIZE_8x8
    return;
#endif

    for(i = 0; i < 16; i++)
    {
        S32 blk_8x8_mask;
        S32 is_16x16_blk_valid;
        S32 num_clusters_updated;
        S32 num_clusters;

        blk_8x8_mask = ps_ctb_cluster_info->pi4_blk_8x8_mask[i];

        ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];

        is_16x16_blk_valid = (blk_8x8_mask == 0xf);

        if(is_16x16_blk_valid)
        {
            /* Use 8x8 data when 16x16 CU is split */
            if(ps_search_results[i].u1_split_flag)
            {
                S32 blk_8x8_idx = i << 2;

                num_parts = 4;
                e_part_type = PRT_NxN;

                for(j = 0; j < num_parts; j++, blk_8x8_idx++)
                {
                    /* Only 2Nx2N partition supported for 8x8 block */
                    ASSERT(
                        ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].u1_part_type ==
                        ((PART_TYPE_T)PRT_2Nx2N));

                    aps_part_result[j] =
                        &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].as_pu_results[0];
                    aps_inferior_parts[j] =
                        &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[1].as_pu_results[0];
                    ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
                }
            }
            else
            {
                ps_best_result = &ps_16x16_cu_results[i].ps_best_results[0];

                e_part_type = (PART_TYPE_T)ps_best_result->u1_part_type;
                num_parts = gau1_num_parts_in_part_type[e_part_type];

                for(j = 0; j < num_parts; j++)
                {
                    aps_part_result[j] = &ps_best_result->as_pu_results[j];
                    aps_inferior_parts[j] = &ps_best_result[1].as_pu_results[j];
                    ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
                }

                ps_ctb_cluster_info->au1_is_16x16_blk_split[i] = 0;
            }

            for(j = 0; j < num_parts; j++)
            {
                pu_result_t *ps_part_result = aps_part_result[j];

                S32 num_mvs = ((ai4_pred_mode[j] > 1) + 1);

                e_part_id = ge_part_type_to_part_id[e_part_type][j];

                /* Skip clustering if best mode is intra */
                if((ps_part_result->pu.b1_intra_flag))
                {
                    ps_blk_16x16->intra_mv_area += gai4_partition_area[e_part_id];
                    ps_blk_16x16->best_inter_cost += aps_inferior_parts[j]->i4_tot_cost;
                    continue;
                }
                else
                {
                    ps_blk_16x16->best_inter_cost += ps_part_result->i4_tot_cost;
                }

#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
                if(e_quality_preset >= ME_HIGH_QUALITY)
                {
                    continue;
                }
#endif

                for(k = 0; k < num_mvs; k++)
                {
                    mv_t *ps_mv;

                    pu_mv_t *ps_pu_mv = &ps_part_result->pu.mv;

                    S32 is_l0_mv = ((ai4_pred_mode[j] == 2) && !k) || (ai4_pred_mode[j] == 0);

                    ps_mv = (is_l0_mv) ? (&ps_pu_mv->s_l0_mv) : (&ps_pu_mv->s_l1_mv);

                    mvx = ps_mv->i2_mvx;
                    mvy = ps_mv->i2_mvy;

                    ref_idx = (is_l0_mv) ? pi1_past_list[ps_pu_mv->i1_l0_ref_idx]
                                         : pi1_future_list[ps_pu_mv->i1_l1_ref_idx];

                    num_clusters = ps_blk_16x16->num_clusters;

                    hme_find_and_update_clusters(
                        ps_blk_16x16->as_cluster_data,
                        &(ps_blk_16x16->num_clusters),
                        mvx,
                        mvy,
                        ref_idx,
                        ps_part_result->i4_sdi,
                        e_part_id,
                        (ai4_pred_mode[j] == 2));

                    num_clusters_updated = (ps_blk_16x16->num_clusters);

                    ps_blk_16x16->au1_num_clusters[ref_idx] +=
                        (num_clusters_updated - num_clusters);
                }
            }
        }
    }

    /* Search for 32x32 clusters */
    for(i = 0; i < 4; i++)
    {
        S32 num_clusters_merged;

        S32 is_32x32_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << i)) || 0;

        if(is_32x32_blk_valid)
        {
            ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
            ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i << 2];

#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
            if(e_quality_preset >= ME_HIGH_QUALITY)
            {
                for(j = 0; j < 4; j++, ps_blk_16x16++)
                {
                    ps_blk_32x32->intra_mv_area += ps_blk_16x16->intra_mv_area;

                    ps_blk_32x32->best_inter_cost += ps_blk_16x16->best_inter_cost;
                }
                continue;
            }
#endif

            hme_update_32x32_clusters(ps_blk_32x32, ps_blk_16x16);

            if((ps_blk_32x32->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
            {
                num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
                    ps_blk_32x32->as_cluster_data, (ps_blk_32x32->num_clusters));

                if(num_clusters_merged)
                {
                    ps_blk_32x32->num_clusters -= num_clusters_merged;

                    UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_32x32);
                }
            }
        }
    }

#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
    /* Eliminate outlier 32x32 clusters */
    if(e_quality_preset < ME_HIGH_QUALITY)
#endif
    {
        hme_boot_out_outlier(ps_ctb_cluster_info, 32);

        /* Find best_uni_ref and best_alt_ref */
        hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 32);
    }

    /* Populate the CU tree for depths 1 and higher */
    {
        cur_ctb_cu_tree_t *ps_tree_root = ps_ctb_cluster_info->ps_cu_tree_root;
        cur_ctb_cu_tree_t *ps_tl = ps_tree_root->ps_child_node_tl;
        cur_ctb_cu_tree_t *ps_tr = ps_tree_root->ps_child_node_tr;
        cur_ctb_cu_tree_t *ps_bl = ps_tree_root->ps_child_node_bl;
        cur_ctb_cu_tree_t *ps_br = ps_tree_root->ps_child_node_br;

        hme_populate_cu_tree(
            ps_ctb_cluster_info, ps_tl, 1, e_quality_preset, POS_NA, POS_NA, POS_TL);

        num_32x32_merges += (ps_tl->is_node_valid == 1);

        hme_populate_cu_tree(
            ps_ctb_cluster_info, ps_tr, 1, e_quality_preset, POS_NA, POS_NA, POS_TR);

        num_32x32_merges += (ps_tr->is_node_valid == 1);

        hme_populate_cu_tree(
            ps_ctb_cluster_info, ps_bl, 1, e_quality_preset, POS_NA, POS_NA, POS_BL);

        num_32x32_merges += (ps_bl->is_node_valid == 1);

        hme_populate_cu_tree(
            ps_ctb_cluster_info, ps_br, 1, e_quality_preset, POS_NA, POS_NA, POS_BR);

        num_32x32_merges += (ps_br->is_node_valid == 1);
    }

#if !ENABLE_4CTB_EVALUATION
    if(e_quality_preset < ME_HIGH_QUALITY)
    {
        enable_64x64_merge = (num_32x32_merges >= 3);
    }
#else
    if(e_quality_preset < ME_HIGH_QUALITY)
    {
        enable_64x64_merge = 1;
    }
#endif

#if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
    if(e_quality_preset >= ME_HIGH_QUALITY)
    {
        enable_64x64_merge = 1;
    }
#else
    if(e_quality_preset >= ME_HIGH_QUALITY)
    {
        enable_64x64_merge = (num_32x32_merges >= 3);
    }
#endif

    if(enable_64x64_merge)
    {
        S32 num_clusters_merged;

        ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[0];

#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
        if(e_quality_preset >= ME_HIGH_QUALITY)
        {
            for(j = 0; j < 4; j++, ps_blk_32x32++)
            {
                ps_blk_64x64->intra_mv_area += ps_blk_32x32->intra_mv_area;

                ps_blk_64x64->best_inter_cost += ps_blk_32x32->best_inter_cost;
            }
        }
        else
#endif
        {
            hme_update_64x64_clusters(ps_blk_64x64, ps_blk_32x32);

            if((ps_blk_64x64->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
            {
                num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
                    ps_blk_64x64->as_cluster_data, (ps_blk_64x64->num_clusters));

                if(num_clusters_merged)
                {
                    ps_blk_64x64->num_clusters -= num_clusters_merged;

                    UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_64x64);
                }
            }
        }

#if !ENABLE_4CTB_EVALUATION
        if(e_quality_preset < ME_HIGH_QUALITY)
        {
            S32 best_inter_cost = ps_blk_64x64->best_inter_cost;
            S32 best_intra_cost =
                ((ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
                  ps_ctb_cluster_info->i4_frame_qstep *
                      ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16) < 0)
                    ? MAX_32BIT_VAL
                    : (ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
                       ps_ctb_cluster_info->i4_frame_qstep *
                           ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16);
            S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
            S32 cost_differential = (best_inter_cost - best_cost);

            enable_64x64_merge =
                ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential));
        }
#endif
    }

    if(enable_64x64_merge)
    {
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
        if(e_quality_preset < ME_HIGH_QUALITY)
#endif
        {
            hme_boot_out_outlier(ps_ctb_cluster_info, 64);

            hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 64);
        }

        hme_populate_cu_tree(
            ps_ctb_cluster_info,
            ps_ctb_cluster_info->ps_cu_tree_root,
            0,
            e_quality_preset,
            POS_NA,
            POS_NA,
            POS_NA);
    }
}
#endif

static __inline void hme_merge_prms_init(
    hme_merge_prms_t *ps_prms,
    layer_ctxt_t *ps_curr_layer,
    refine_prms_t *ps_refine_prms,
    me_frm_ctxt_t *ps_me_ctxt,
    range_prms_t *ps_range_prms_rec,
    range_prms_t *ps_range_prms_inp,
    mv_grid_t **pps_mv_grid,
    inter_ctb_prms_t *ps_inter_ctb_prms,
    S32 i4_num_pred_dir,
    S32 i4_32x32_id,
    BLK_SIZE_T e_blk_size,
    ME_QUALITY_PRESETS_T e_me_quality_presets)
{
    S32 i4_use_rec = ps_refine_prms->i4_use_rec_in_fpel;
    S32 i4_cu_16x16 = (BLK_32x32 == e_blk_size) ? (i4_32x32_id << 2) : 0;

    /* Currently not enabling segmentation info from prev layers */
    ps_prms->i4_seg_info_avail = 0;
    ps_prms->i4_part_mask = 0;

    /* Number of reference pics in which to do merge */
    ps_prms->i4_num_ref = i4_num_pred_dir;

    /* Layer ctxt info */
    ps_prms->ps_layer_ctxt = ps_curr_layer;

    ps_prms->ps_inter_ctb_prms = ps_inter_ctb_prms;

    /* Top left, top right, bottom left and bottom right 16x16 units */
    if(BLK_32x32 == e_blk_size)
    {
        ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16];
        ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 1];
        ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 2];
        ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 3];

        /* Merge results stored here */
        ps_prms->ps_results_merge = &ps_me_ctxt->as_search_results_32x32[i4_32x32_id];

        /* This could be lesser than the number of 16x16results generated*/
        /* For now, keeping it to be same                                */
        ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_fpel_results;
        ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[i4_32x32_id << 4];
        ps_prms->ps_results_grandchild = NULL;
    }
    else
    {
        ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_32x32[0];
        ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_32x32[1];
        ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_32x32[2];
        ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_32x32[3];

        /* Merge results stored here */
        ps_prms->ps_results_merge = &ps_me_ctxt->s_search_results_64x64;

        ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_32x32_merge_results;
        ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[0];
        ps_prms->ps_results_grandchild = ps_me_ctxt->as_search_results_16x16;
    }

    if(i4_use_rec)
    {
        WORD32 ref_ctr;

        for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
        {
            ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_rec[ref_ctr];
        }
    }
    else
    {
        WORD32 ref_ctr;

        for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
        {
            ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_inp[ref_ctr];
        }
    }
    ps_prms->i4_use_rec = i4_use_rec;

    ps_prms->pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;

    ps_prms->pps_mv_grid = pps_mv_grid;

    ps_prms->log_ctb_size = ps_me_ctxt->log_ctb_size;

    ps_prms->e_quality_preset = e_me_quality_presets;
    ps_prms->pi1_future_list = ps_me_ctxt->ai1_future_list;
    ps_prms->pi1_past_list = ps_me_ctxt->ai1_past_list;
    ps_prms->ps_cluster_info = ps_me_ctxt->ps_ctb_cluster_info;
}

/**
********************************************************************************
*  @fn   void hme_refine(me_ctxt_t *ps_ctxt,
*                       refine_layer_prms_t *ps_refine_prms)
*
*  @brief  Top level entry point for refinement ME
*
*  @param[in,out]  ps_ctxt: ME Handle
*
*  @param[in]  ps_refine_prms : refinement layer prms
*
*  @return None
********************************************************************************
*/
void hme_refine(
    me_ctxt_t *ps_thrd_ctxt,
    refine_prms_t *ps_refine_prms,
    PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,
    layer_ctxt_t *ps_coarse_layer,
    multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
    S32 lyr_job_type,
    S32 thrd_id,
    S32 me_frm_id,
    pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input)
{
    inter_ctb_prms_t s_common_frm_prms;

    BLK_SIZE_T e_search_blk_size, e_result_blk_size;
    WORD32 i4_me_frm_id = me_frm_id % MAX_NUM_ME_PARALLEL;
    me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
    ME_QUALITY_PRESETS_T e_me_quality_presets =
        ps_thrd_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;

    WORD32 num_rows_proc = 0;
    WORD32 num_act_ref_pics;
    WORD16 i2_prev_enc_frm_max_mv_y;
    WORD32 i4_idx_dvsr_p = ps_multi_thrd_ctxt->i4_idx_dvsr_p;

    /*************************************************************************/
    /* Complexity of search: Low to High                                     */
    /*************************************************************************/
    SEARCH_COMPLEXITY_T e_search_complexity;

    /*************************************************************************/
    /* to store the PU results which are passed to the decide_part_types     */
    /* as input prms. Multiplied by 4 as the max number of Ref in a List is 4*/
    /*************************************************************************/

    pu_result_t as_pu_results[2][TOT_NUM_PARTS][MAX_NUM_RESULTS_PER_PART_LIST];
    inter_pu_results_t as_inter_pu_results[4];
    inter_pu_results_t *ps_pu_results = as_inter_pu_results;

    /*************************************************************************/
    /* Config parameter structures for varius ME submodules                  */
    /*************************************************************************/
    hme_merge_prms_t s_merge_prms_32x32_tl, s_merge_prms_32x32_tr;
    hme_merge_prms_t s_merge_prms_32x32_bl, s_merge_prms_32x32_br;
    hme_merge_prms_t s_merge_prms_64x64;
    hme_search_prms_t s_search_prms_blk;
    mvbank_update_prms_t s_mv_update_prms;
    hme_ctb_prms_t s_ctb_prms;
    hme_subpel_prms_t s_subpel_prms;
    fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_ctxt->ps_fullpel_refine_ctxt;
    ctb_cluster_info_t *ps_ctb_cluster_info;
    fpel_srch_cand_init_data_t s_srch_cand_init_data;

    /* 4 bits (LSBs) of this variable control merge of 4 32x32 CUs in CTB */
    S32 en_merge_32x32;
    /* 5 lsb's specify whether or not merge algorithm is required */
    /* to be executed or not. Relevant only in PQ. Ought to be */
    /* used in conjunction with en_merge_32x32 and */
    /* ps_ctb_bound_attrs->u1_merge_to_64x64_flag. This is */
    /* required when all children are deemed to be intras */
    S32 en_merge_execution;

    /*************************************************************************/
    /* All types of search candidates for predictor based search.            */
    /*************************************************************************/
    S32 num_init_candts = 0;
    S32 i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
    S32 i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
    search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
    search_node_t as_top_neighbours[4], as_left_neighbours[3];

    pf_get_wt_inp fp_get_wt_inp;

    search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
    U32 au4_unique_node_map[MAP_X_MAX * 2];

    /* Controls the boundary attributes of CTB, whether it has 64x64 or not */
    ctb_boundary_attrs_t *ps_ctb_bound_attrs;

    /*************************************************************************/
    /* points ot the search results for the blk level search (8x8/16x16)     */
    /*************************************************************************/
    search_results_t *ps_search_results;

    /*************************************************************************/
    /* Coordinates                                                           */
    /*************************************************************************/
    S32 blk_x, blk_y, i4_ctb_x, i4_ctb_y, tile_col_idx, blk_id_in_ctb;
    S32 pos_x, pos_y;
    S32 blk_id_in_full_ctb;

    /*************************************************************************/
    /* Related to dimensions of block being searched and pic dimensions      */
    /*************************************************************************/
    S32 blk_4x4_to_16x16;
    S32 blk_wd, blk_ht, blk_size_shift;
    S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
    S32 num_results_prev_layer;

    /*************************************************************************/
    /* Size of a basic unit for this layer. For non encode layers, we search */
    /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
    /* basic unit size is the ctb size.                                      */
    /*************************************************************************/
    S32 unit_size;

    /*************************************************************************/
    /* Local variable storing results of any 4 CU merge to bigger CU         */
    /*************************************************************************/
    CU_MERGE_RESULT_T e_merge_result;

    /*************************************************************************/
    /* This mv grid stores results during and after fpel search, during      */
    /* merge, subpel and bidirect refinements stages. 2 instances of this are*/
    /* meant for the 2 directions of search (l0 and l1).                     */
    /*************************************************************************/
    mv_grid_t *aps_mv_grid[2];

    /*************************************************************************/
    /* Pointers to context in current and coarser layers                     */
    /*************************************************************************/
    layer_ctxt_t *ps_curr_layer, *ps_prev_layer;

    /*************************************************************************/
    /* to store mv range per blk, and picture limit, allowed search range    */
    /* range prms in hpel and qpel units as well                             */
    /*************************************************************************/
    range_prms_t as_range_prms_inp[MAX_NUM_REF], as_range_prms_rec[MAX_NUM_REF];
    range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
    range_prms_t as_range_prms_hpel[MAX_NUM_REF], as_range_prms_qpel[MAX_NUM_REF];

    /*************************************************************************/
    /* These variables are used to track number of references at different   */
    /* stages of ME.                                                         */
    /*************************************************************************/
    S32 i4_num_pred_dir;
    S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
    S32 lambda_recon = ps_refine_prms->lambda_recon;

    /* Counts successful merge to 32x32 every CTB (0-4) */
    S32 merge_count_32x32;

    S32 ai4_id_coloc[14], ai4_id_Z[2];
    U08 au1_search_candidate_list_index[2];
    S32 ai4_num_coloc_cands[2];
    U08 u1_pred_dir, u1_pred_dir_ctr;

    /*************************************************************************/
    /* Input pointer and stride                                              */
    /*************************************************************************/
    U08 *pu1_inp;
    S32 i4_inp_stride;
    S32 end_of_frame;
    S32 num_sync_units_in_row, num_sync_units_in_tile;

    /*************************************************************************/
    /* Indicates whether the all 4 8x8 blks are valid in the 16x16 blk in the*/
    /* encode layer. If not 15, then 1 or more 8x8 blks not valid. Means that*/
    /* we need to stop merges and force 8x8 CUs for that 16x16 blk           */
    /*************************************************************************/
    S32 blk_8x8_mask;
    S32 ai4_blk_8x8_mask[16];
    U08 au1_is_64x64Blk_noisy[1];
    U08 au1_is_32x32Blk_noisy[4];
    U08 au1_is_16x16Blk_noisy[16];

    ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
        ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
    ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
        ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);

    ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);

    /*************************************************************************/
    /* Pointers to current and coarse layer are needed for projection */
    /* Pointer to prev layer are needed for other candts like coloc   */
    /*************************************************************************/
    ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];

    ps_prev_layer = hme_get_past_layer_ctxt(
        ps_thrd_ctxt, ps_ctxt, ps_refine_prms->i4_layer_id, ps_multi_thrd_ctxt->i4_num_me_frm_pllel);

    num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;

    /* Function pointer is selected based on the C vc X86 macro */

    fp_get_wt_inp = ps_me_optimised_function_list->pf_get_wt_inp_ctb;

    i4_inp_stride = ps_curr_layer->i4_inp_stride;
    i4_pic_wd = ps_curr_layer->i4_wd;
    i4_pic_ht = ps_curr_layer->i4_ht;
    e_search_complexity = ps_refine_prms->e_search_complexity;
    end_of_frame = 0;

    /* This points to all the initial candts */
    ps_search_candts = &as_search_candts[0];

    /* mv grid being huge strucutre is part of context */
    aps_mv_grid[0] = &ps_ctxt->as_mv_grid[0];
    aps_mv_grid[1] = &ps_ctxt->as_mv_grid[1];

    /*************************************************************************/
    /* If the current layer is encoded (since it may be multicast or final   */
    /* layer (finest)), then we use 16x16 blk size with some selected parts  */
    /* If the current layer is not encoded, then we use 8x8 blk size, with   */
    /* enable or disable of 4x4 partitions depending on the input prms       */
    /*************************************************************************/
    e_search_blk_size = BLK_16x16;
    blk_wd = blk_ht = 16;
    blk_size_shift = 4;
    e_result_blk_size = BLK_8x8;
    s_mv_update_prms.i4_shift = 1;

    if(ps_coarse_layer->ps_layer_mvbank->e_blk_size == BLK_4x4)
    {
        blk_4x4_to_16x16 = 1;
    }
    else
    {
        blk_4x4_to_16x16 = 0;
    }

    unit_size = 1 << ps_ctxt->log_ctb_size;
    s_search_prms_blk.i4_inp_stride = unit_size;

    /* This is required to properly update the layer mv bank */
    s_mv_update_prms.e_search_blk_size = e_search_blk_size;
    s_search_prms_blk.e_blk_size = e_search_blk_size;

    /*************************************************************************/
    /* If current layer is explicit, then the number of ref frames are to    */
    /* be same as previous layer. Else it will be 2                          */
    /*************************************************************************/
    i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
    i4_num_pred_dir =
        (ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 0) && (i4_num_act_ref_l1 > 0)) +
        1;

#if USE_MODIFIED == 1
    s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
#else
    s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
#endif

    i4_num_pred_dir = MIN(i4_num_pred_dir, i4_num_ref_prev_layer);
    if(i4_num_ref_prev_layer <= 2)
    {
        i4_num_ref_each_dir = 1;
    }
    else
    {
        i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
    }

    s_mv_update_prms.i4_num_ref = i4_num_pred_dir;
    s_mv_update_prms.i4_num_results_to_store =
        MIN((ps_ctxt->s_frm_prms.bidir_enabled) ? ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref
                                                : (i4_num_act_ref_l0 > 1) + 1,
            ps_refine_prms->i4_num_results_per_part);

    /*************************************************************************/
    /* Initialization of merge params for 16x16 to 32x32 merge.              */
    /* There are 4 32x32 units in a CTB, so 4 param structures initialized   */
    /*************************************************************************/
    {
        hme_merge_prms_t *aps_merge_prms[4];
        aps_merge_prms[0] = &s_merge_prms_32x32_tl;
        aps_merge_prms[1] = &s_merge_prms_32x32_tr;
        aps_merge_prms[2] = &s_merge_prms_32x32_bl;
        aps_merge_prms[3] = &s_merge_prms_32x32_br;
        for(i = 0; i < 4; i++)
        {
            hme_merge_prms_init(
                aps_merge_prms[i],
                ps_curr_layer,
                ps_refine_prms,
                ps_ctxt,
                as_range_prms_rec,
                as_range_prms_inp,
                &aps_mv_grid[0],
                &s_common_frm_prms,
                i4_num_pred_dir,
                i,
                BLK_32x32,
                e_me_quality_presets);
        }
    }

    /*************************************************************************/
    /* Initialization of merge params for 32x32 to 64x64 merge.              */
    /* There are 4 32x32 units in a CTB, so only 1 64x64 CU can be in CTB    */
    /*************************************************************************/
    {
        hme_merge_prms_init(
            &s_merge_prms_64x64,
            ps_curr_layer,
            ps_refine_prms,
            ps_ctxt,
            as_range_prms_rec,
            as_range_prms_inp,
            &aps_mv_grid[0],
            &s_common_frm_prms,
            i4_num_pred_dir,
            0,
            BLK_64x64,
            e_me_quality_presets);
    }

    /* Pointers to cu_results are initialised here */
    {
        WORD32 i;

        ps_ctxt->s_search_results_64x64.ps_cu_results = &ps_ctxt->s_cu64x64_results;

        for(i = 0; i < 4; i++)
        {
            ps_ctxt->as_search_results_32x32[i].ps_cu_results = &ps_ctxt->as_cu32x32_results[i];
        }

        for(i = 0; i < 16; i++)
        {
            ps_ctxt->as_search_results_16x16[i].ps_cu_results = &ps_ctxt->as_cu16x16_results[i];
        }
    }

    /*************************************************************************/
    /* SUBPEL Params initialized here                                        */
    /*************************************************************************/
    {
        s_subpel_prms.ps_search_results_16x16 = &ps_ctxt->as_search_results_16x16[0];
        s_subpel_prms.ps_search_results_32x32 = &ps_ctxt->as_search_results_32x32[0];
        s_subpel_prms.ps_search_results_64x64 = &ps_ctxt->s_search_results_64x64;

        s_subpel_prms.i4_num_16x16_candts = ps_refine_prms->i4_num_fpel_results;
        s_subpel_prms.i4_num_32x32_candts = ps_refine_prms->i4_num_32x32_merge_results;
        s_subpel_prms.i4_num_64x64_candts = ps_refine_prms->i4_num_64x64_merge_results;

        s_subpel_prms.i4_num_steps_hpel_refine = ps_refine_prms->i4_num_steps_hpel_refine;
        s_subpel_prms.i4_num_steps_qpel_refine = ps_refine_prms->i4_num_steps_qpel_refine;

        s_subpel_prms.i4_use_satd = ps_refine_prms->i4_use_satd_subpel;

        s_subpel_prms.i4_inp_stride = unit_size;

        s_subpel_prms.u1_max_subpel_candts_2Nx2N = ps_refine_prms->u1_max_subpel_candts_2Nx2N;
        s_subpel_prms.u1_max_subpel_candts_NxN = ps_refine_prms->u1_max_subpel_candts_NxN;
        s_subpel_prms.u1_subpel_candt_threshold = ps_refine_prms->u1_subpel_candt_threshold;

        s_subpel_prms.pf_qpel_interp = ps_me_optimised_function_list->pf_qpel_interp_avg_generic;

        {
            WORD32 ref_ctr;
            for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
            {
                s_subpel_prms.aps_mv_range_hpel[ref_ctr] = &as_range_prms_hpel[ref_ctr];
                s_subpel_prms.aps_mv_range_qpel[ref_ctr] = &as_range_prms_qpel[ref_ctr];
            }
        }
        s_subpel_prms.pi2_inp_bck = ps_ctxt->pi2_inp_bck;

#if USE_MODIFIED == 0
        s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
#else
        s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
#endif
        s_subpel_prms.e_me_quality_presets = e_me_quality_presets;

        /* BI Refinement done only if this field is 1 */
        s_subpel_prms.bidir_enabled = ps_refine_prms->bidir_enabled;

        s_subpel_prms.u1_num_ref = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;

        s_subpel_prms.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
        s_subpel_prms.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
        s_subpel_prms.u1_max_num_subpel_refine_centers =
            ps_refine_prms->u1_max_num_subpel_refine_centers;
    }

    /* inter_ctb_prms_t struct initialisation */
    {
        inter_ctb_prms_t *ps_inter_ctb_prms = &s_common_frm_prms;
        hme_subpel_prms_t *ps_subpel_prms = &s_subpel_prms;

        ps_inter_ctb_prms->pps_rec_list_l0 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l0;
        ps_inter_ctb_prms->pps_rec_list_l1 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l1;
        ps_inter_ctb_prms->wpred_log_wdc = ps_ctxt->s_wt_pred.wpred_log_wdc;
        ps_inter_ctb_prms->u1_max_tr_depth = ps_thrd_ctxt->s_init_prms.u1_max_tr_depth;
        ps_inter_ctb_prms->i1_quality_preset = e_me_quality_presets;
        ps_inter_ctb_prms->i4_bidir_enabled = ps_subpel_prms->bidir_enabled;
        ps_inter_ctb_prms->i4_inp_stride = ps_subpel_prms->i4_inp_stride;
        ps_inter_ctb_prms->u1_num_ref = ps_subpel_prms->u1_num_ref;
        ps_inter_ctb_prms->u1_use_satd = ps_subpel_prms->i4_use_satd;
        ps_inter_ctb_prms->i4_rec_stride = ps_curr_layer->i4_rec_stride;
        ps_inter_ctb_prms->u1_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
        ps_inter_ctb_prms->u1_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
        ps_inter_ctb_prms->i4_lamda = lambda_recon;
        ps_inter_ctb_prms->u1_lamda_qshift = ps_refine_prms->lambda_q_shift;
        ps_inter_ctb_prms->i4_qstep_ls8 = ps_ctxt->ps_hme_frm_prms->qstep_ls8;
        ps_inter_ctb_prms->pi4_inv_wt = ps_ctxt->s_wt_pred.a_inv_wpred_wt;
        ps_inter_ctb_prms->pi1_past_list = ps_ctxt->ai1_past_list;
        ps_inter_ctb_prms->pi1_future_list = ps_ctxt->ai1_future_list;
        ps_inter_ctb_prms->pu4_src_variance = s_search_prms_blk.au4_src_variance;
        ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands =
            ps_refine_prms->u1_max_2nx2n_tu_recur_cands;
    }

    for(i = 0; i < MAX_INIT_CANDTS; i++)
    {
        ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
        ps_search_candts[i].ps_search_node->ps_mv = &ps_ctxt->as_search_cand_mv[i];

        INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
    }
    num_act_ref_pics =
        ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1;

    if(num_act_ref_pics)
    {
        hme_search_cand_data_init(
            ai4_id_Z,
            ai4_id_coloc,
            ai4_num_coloc_cands,
            au1_search_candidate_list_index,
            i4_num_act_ref_l0,
            i4_num_act_ref_l1,
            ps_ctxt->s_frm_prms.bidir_enabled,
            blk_4x4_to_16x16);
    }

    if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 1))
    {
        ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
        ps_search_candts[ai4_id_Z[1]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[1];
    }
    else if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 == 1))
    {
        ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
    }

    for(i = 0; i < 3; i++)
    {
        search_node_t *ps_search_node;
        ps_search_node = &as_left_neighbours[i];
        INIT_SEARCH_NODE(ps_search_node, 0);
        ps_search_node = &as_top_neighbours[i];
        INIT_SEARCH_NODE(ps_search_node, 0);
    }

    INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
    as_left_neighbours[2].u1_is_avail = 0;

    /*************************************************************************/
    /* Initialize all the search results structure here. We update all the   */
    /* search results to default values, and configure things like blk sizes */
    /*************************************************************************/
    if(num_act_ref_pics)
    {
        S32 i4_x, i4_y;
        /* 16x16 results */
        for(i = 0; i < 16; i++)
        {
            search_results_t *ps_search_results;
            S32 pred_lx;
            ps_search_results = &ps_ctxt->as_search_results_16x16[i];
            i4_x = (S32)gau1_encode_to_raster_x[i];
            i4_y = (S32)gau1_encode_to_raster_y[i];
            i4_x <<= 4;
            i4_y <<= 4;

            hme_init_search_results(
                ps_search_results,
                i4_num_pred_dir,
                ps_refine_prms->i4_num_fpel_results,
                ps_refine_prms->i4_num_results_per_part,
                e_search_blk_size,
                i4_x,
                i4_y,
                &ps_ctxt->au1_is_past[0]);

            for(pred_lx = 0; pred_lx < 2; pred_lx++)
            {
                pred_ctxt_t *ps_pred_ctxt;

                ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];

                hme_init_pred_ctxt_encode(
                    ps_pred_ctxt,
                    ps_search_results,
                    ps_search_candts[ai4_id_coloc[0]].ps_search_node,
                    ps_search_candts[ai4_id_Z[0]].ps_search_node,
                    aps_mv_grid[pred_lx],
                    pred_lx,
                    lambda_recon,
                    ps_refine_prms->lambda_q_shift,
                    &ps_ctxt->apu1_ref_bits_tlu_lc[0],
                    &ps_ctxt->ai2_ref_scf[0]);
            }
        }

        for(i = 0; i < 4; i++)
        {
            search_results_t *ps_search_results;
            S32 pred_lx;
            ps_search_results = &ps_ctxt->as_search_results_32x32[i];

            i4_x = (S32)gau1_encode_to_raster_x[i];
            i4_y = (S32)gau1_encode_to_raster_y[i];
            i4_x <<= 5;
            i4_y <<= 5;

            hme_init_search_results(
                ps_search_results,
                i4_num_pred_dir,
                ps_refine_prms->i4_num_32x32_merge_results,
                ps_refine_prms->i4_num_results_per_part,
                BLK_32x32,
                i4_x,
                i4_y,
                &ps_ctxt->au1_is_past[0]);

            for(pred_lx = 0; pred_lx < 2; pred_lx++)
            {
                pred_ctxt_t *ps_pred_ctxt;

                ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];

                hme_init_pred_ctxt_encode(
                    ps_pred_ctxt,
                    ps_search_results,
                    ps_search_candts[ai4_id_coloc[0]].ps_search_node,
                    ps_search_candts[ai4_id_Z[0]].ps_search_node,
                    aps_mv_grid[pred_lx],
                    pred_lx,
                    lambda_recon,
                    ps_refine_prms->lambda_q_shift,
                    &ps_ctxt->apu1_ref_bits_tlu_lc[0],
                    &ps_ctxt->ai2_ref_scf[0]);
            }
        }

        {
            search_results_t *ps_search_results;
            S32 pred_lx;
            ps_search_results = &ps_ctxt->s_search_results_64x64;

            hme_init_search_results(
                ps_search_results,
                i4_num_pred_dir,
                ps_refine_prms->i4_num_64x64_merge_results,
                ps_refine_prms->i4_num_results_per_part,
                BLK_64x64,
                0,
                0,
                &ps_ctxt->au1_is_past[0]);

            for(pred_lx = 0; pred_lx < 2; pred_lx++)
            {
                pred_ctxt_t *ps_pred_ctxt;

                ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];

                hme_init_pred_ctxt_encode(
                    ps_pred_ctxt,
                    ps_search_results,
                    ps_search_candts[ai4_id_coloc[0]].ps_search_node,
                    ps_search_candts[ai4_id_Z[0]].ps_search_node,
                    aps_mv_grid[pred_lx],
                    pred_lx,
                    lambda_recon,
                    ps_refine_prms->lambda_q_shift,
                    &ps_ctxt->apu1_ref_bits_tlu_lc[0],
                    &ps_ctxt->ai2_ref_scf[0]);
            }
        }
    }

    /* Initialise the structure used in clustering  */
    if(ME_PRISTINE_QUALITY == e_me_quality_presets)
    {
        ps_ctb_cluster_info = ps_ctxt->ps_ctb_cluster_info;

        ps_ctb_cluster_info->ps_16x16_blk = ps_ctxt->ps_blk_16x16;
        ps_ctb_cluster_info->ps_32x32_blk = ps_ctxt->ps_blk_32x32;
        ps_ctb_cluster_info->ps_64x64_blk = ps_ctxt->ps_blk_64x64;
        ps_ctb_cluster_info->pi4_blk_8x8_mask = ai4_blk_8x8_mask;
        ps_ctb_cluster_info->sdi_threshold = ps_refine_prms->sdi_threshold;
        ps_ctb_cluster_info->i4_frame_qstep = ps_ctxt->frm_qstep;
        ps_ctb_cluster_info->i4_frame_qstep_multiplier = 16;
    }

    /*********************************************************************/
    /* Initialize the dyn. search range params. for each reference index */
    /* in current layer ctxt                                             */
    /*********************************************************************/

    /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
    if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
    {
        WORD32 ref_ctr;
        /* set no. of act ref in L0 for further use at frame level */
        ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_num_act_ref_in_l0 =
            ps_ctxt->s_frm_prms.u1_num_active_ref_l0;

        for(ref_ctr = 0; ref_ctr < ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ref_ctr++)
        {
            INIT_DYN_SEARCH_PRMS(
                &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[ref_ctr],
                ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
        }
    }
    /*************************************************************************/
    /* Now that the candidates have been ordered, to choose the right number */
    /* of initial candidates.                                                */
    /*************************************************************************/
    if(blk_4x4_to_16x16)
    {
        if(i4_num_ref_prev_layer > 2)
        {
            if(e_search_complexity == SEARCH_CX_LOW)
                num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
            else if(e_search_complexity == SEARCH_CX_MED)
                num_init_candts = 14 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
            else if(e_search_complexity == SEARCH_CX_HIGH)
                num_init_candts = 21 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
            else
                ASSERT(0);
        }
        else if(i4_num_ref_prev_layer == 2)
        {
            if(e_search_complexity == SEARCH_CX_LOW)
                num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
            else if(e_search_complexity == SEARCH_CX_MED)
                num_init_candts = 12 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
            else if(e_search_complexity == SEARCH_CX_HIGH)
                num_init_candts = 19 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
            else
                ASSERT(0);
        }
        else
        {
            if(e_search_complexity == SEARCH_CX_LOW)
                num_init_candts = 5;
            else if(e_search_complexity == SEARCH_CX_MED)
                num_init_candts = 12;
            else if(e_search_complexity == SEARCH_CX_HIGH)
                num_init_candts = 19;
            else
                ASSERT(0);
        }
    }
    else
    {
        if(i4_num_ref_prev_layer > 2)
        {
            if(e_search_complexity == SEARCH_CX_LOW)
                num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
            else if(e_search_complexity == SEARCH_CX_MED)
                num_init_candts = 13 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
            else if(e_search_complexity == SEARCH_CX_HIGH)
                num_init_candts = 18 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
            else
                ASSERT(0);
        }
        else if(i4_num_ref_prev_layer == 2)
        {
            if(e_search_complexity == SEARCH_CX_LOW)
                num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
            else if(e_search_complexity == SEARCH_CX_MED)
                num_init_candts = 11 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
            else if(e_search_complexity == SEARCH_CX_HIGH)
                num_init_candts = 16 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
            else
                ASSERT(0);
        }
        else
        {
            if(e_search_complexity == SEARCH_CX_LOW)
                num_init_candts = 5;
            else if(e_search_complexity == SEARCH_CX_MED)
                num_init_candts = 11;
            else if(e_search_complexity == SEARCH_CX_HIGH)
                num_init_candts = 16;
            else
                ASSERT(0);
        }
    }

    /*************************************************************************/
    /* The following search parameters are fixed throughout the search across*/
    /* all blks. So these are configured outside processing loop             */
    /*************************************************************************/
    s_search_prms_blk.i4_num_init_candts = num_init_candts;
    s_search_prms_blk.i4_start_step = 1;
    s_search_prms_blk.i4_use_satd = 0;
    s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
    /* we use recon only for encoded layers, otherwise it is not available */
    s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;

    s_search_prms_blk.ps_search_candts = ps_search_candts;
    if(s_search_prms_blk.i4_use_rec)
    {
        WORD32 ref_ctr;
        for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
            s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_rec[ref_ctr];
    }
    else
    {
        WORD32 ref_ctr;
        for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
            s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_inp[ref_ctr];
    }

    /*************************************************************************/
    /* Initialize coordinates. Meaning as follows                            */
    /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks     */
    /* blk_y : same as above, y coord.                                       */
    /* num_blks_in_this_ctb : number of blks in this given ctb that starts   */
    /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries.      */
    /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left  */
    /* corner of the picture. Always multiple of 64.                         */
    /* blk_id_in_ctb : encode order id of the blk in the ctb.                */
    /*************************************************************************/
    blk_y = 0;
    blk_id_in_ctb = 0;
    i4_ctb_y = 0;

    /*************************************************************************/
    /* Picture limit on all 4 sides. This will be used to set mv limits for  */
    /* every block given its coordinate. Note thsi assumes that the min amt  */
    /* of padding to right of pic is equal to the blk size. If we go all the */
    /* way upto 64x64, then the min padding on right size of picture should  */
    /* be 64, and also on bottom side of picture.                            */
    /*************************************************************************/
    SET_PIC_LIMIT(
        s_pic_limit_inp,
        ps_curr_layer->i4_pad_x_rec,
        ps_curr_layer->i4_pad_y_rec,
        ps_curr_layer->i4_wd,
        ps_curr_layer->i4_ht,
        s_search_prms_blk.i4_num_steps_post_refine);

    SET_PIC_LIMIT(
        s_pic_limit_rec,
        ps_curr_layer->i4_pad_x_rec,
        ps_curr_layer->i4_pad_y_rec,
        ps_curr_layer->i4_wd,
        ps_curr_layer->i4_ht,
        s_search_prms_blk.i4_num_steps_post_refine);

    /*************************************************************************/
    /* set the MV limit per ref. pic.                                        */
    /*    - P pic. : Based on the config params.                             */
    /*    - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
    /*************************************************************************/
    hme_set_mv_limit_using_dvsr_data(
        ps_ctxt, ps_curr_layer, as_mv_limit, &i2_prev_enc_frm_max_mv_y, num_act_ref_pics);
    s_srch_cand_init_data.pu1_num_fpel_search_cands = ps_refine_prms->au1_num_fpel_search_cands;
    s_srch_cand_init_data.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
    s_srch_cand_init_data.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
    s_srch_cand_init_data.ps_coarse_layer = ps_coarse_layer;
    s_srch_cand_init_data.ps_curr_layer = ps_curr_layer;
    s_srch_cand_init_data.i4_max_num_init_cands = num_init_candts;
    s_srch_cand_init_data.ps_search_cands = ps_search_candts;
    s_srch_cand_init_data.u1_num_results_in_mvbank = s_mv_update_prms.i4_num_results_to_store;
    s_srch_cand_init_data.pi4_ref_id_lc_to_l0_map = ps_ctxt->a_ref_idx_lc_to_l0;
    s_srch_cand_init_data.pi4_ref_id_lc_to_l1_map = ps_ctxt->a_ref_idx_lc_to_l1;
    s_srch_cand_init_data.e_search_blk_size = e_search_blk_size;

    while(0 == end_of_frame)
    {
        job_queue_t *ps_job;
        frm_ctb_ctxt_t *ps_frm_ctb_prms;
        ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;

        WORD32 i4_max_mv_x_in_ctb;
        WORD32 i4_max_mv_y_in_ctb;
        void *pv_dep_mngr_encloop_dep_me;
        WORD32 offset_val, check_dep_pos, set_dep_pos;
        WORD32 left_ctb_in_diff_tile, i4_first_ctb_x = 0;

        pv_dep_mngr_encloop_dep_me = ps_ctxt->pv_dep_mngr_encloop_dep_me;

        ps_frm_ctb_prms = (frm_ctb_ctxt_t *)ps_thrd_ctxt->pv_ext_frm_prms;

        /* Get the current row from the job queue */
        ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job(
            ps_multi_thrd_ctxt, lyr_job_type, 1, me_frm_id);

        /* If all rows are done, set the end of process flag to 1, */
        /* and the current row to -1 */
        if(NULL == ps_job)
        {
            blk_y = -1;
            i4_ctb_y = -1;
            tile_col_idx = -1;
            end_of_frame = 1;

            continue;
        }

        /* set the output dependency after picking up the row */
        ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, me_frm_id);

        /* Obtain the current row's details from the job */
        {
            ihevce_tile_params_t *ps_col_tile_params;

            i4_ctb_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
            /* Obtain the current colum tile index from the job */
            tile_col_idx = ps_job->s_job_info.s_me_job_info.i4_tile_col_idx;

            /* in encode layer block are 16x16 and CTB is 64 x 64 */
            /* note if ctb is 32x32 the this calc needs to be changed */
            num_sync_units_in_row = (i4_pic_wd + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
                                    ps_ctxt->log_ctb_size;

            /* The tile parameter for the col. idx. Use only the properties
            which is same for all the bottom tiles like width, start_x, etc.
            Don't use height, start_y, etc.                                  */
            ps_col_tile_params =
                ((ihevce_tile_params_t *)ps_thrd_ctxt->pv_tile_params_base + tile_col_idx);
            /* in encode layer block are 16x16 and CTB is 64 x 64 */
            /* note if ctb is 32x32 the this calc needs to be changed */
            num_sync_units_in_tile =
                (ps_col_tile_params->i4_curr_tile_width + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
                ps_ctxt->log_ctb_size;

            i4_first_ctb_x = ps_col_tile_params->i4_first_ctb_x;
            i4_ctb_x = i4_first_ctb_x;

            if(!num_act_ref_pics)
            {
                for(i4_ctb_x = i4_first_ctb_x;
                    i4_ctb_x < (ps_col_tile_params->i4_first_ctb_x + num_sync_units_in_tile);
                    i4_ctb_x++)
                {
                    S32 blk_i = 0, blk_j = 0;
                    /* set the dependency for the corresponding row in enc loop */
                    ihevce_dmgr_set_row_row_sync(
                        pv_dep_mngr_encloop_dep_me,
                        (i4_ctb_x + 1),
                        i4_ctb_y,
                        tile_col_idx /* Col Tile No. */);
                }

                continue;
            }

            /* increment the number of rows proc */
            num_rows_proc++;

            /* Set Variables for Dep. Checking and Setting */
            set_dep_pos = i4_ctb_y + 1;
            if(i4_ctb_y > 0)
            {
                offset_val = 2;
                check_dep_pos = i4_ctb_y - 1;
            }
            else
            {
                /* First row should run without waiting */
                offset_val = -1;
                check_dep_pos = 0;
            }

            /* row ctb out pointer  */
            ps_ctxt->ps_ctb_analyse_curr_row =
                ps_ctxt->ps_ctb_analyse_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;

            /* Row level CU Tree buffer */
            ps_ctxt->ps_cu_tree_curr_row =
                ps_ctxt->ps_cu_tree_base +
                i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE;

            ps_ctxt->ps_me_ctb_data_curr_row =
                ps_ctxt->ps_me_ctb_data_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
        }

        /* This flag says the CTB under processing is at the start of tile in horz dir.*/
        left_ctb_in_diff_tile = 1;

        /* To make sure no 64-bit overflow happens when inv_wt is multiplied with un-normalized src_var,                                 */
        /* the shift value will be passed onto the functions wherever inv_wt isused so that inv_wt is appropriately shift and multiplied */
        {
            S32 i4_ref_id, i4_bits_req;

            for(i4_ref_id = 0; i4_ref_id < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
                                            ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
                i4_ref_id++)
            {
                GETRANGE(i4_bits_req, ps_ctxt->s_wt_pred.a_inv_wpred_wt[i4_ref_id]);

                if(i4_bits_req > 12)
                {
                    ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = (i4_bits_req - 12);
                }
                else
                {
                    ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = 0;
                }
            }

            s_common_frm_prms.pi4_inv_wt_shift_val = ps_ctxt->s_wt_pred.ai4_shift_val;
        }

        /* if non-encode layer then i4_ctb_x will be same as blk_x */
        /* loop over all the units is a row                        */
        for(i4_ctb_x = i4_first_ctb_x; i4_ctb_x < (i4_first_ctb_x + num_sync_units_in_tile);
            i4_ctb_x++)
        {
            ihevce_ctb_noise_params *ps_ctb_noise_params =
                &ps_ctxt->ps_ctb_analyse_curr_row[i4_ctb_x].s_ctb_noise_params;

            s_common_frm_prms.i4_ctb_x_off = i4_ctb_x << 6;
            s_common_frm_prms.i4_ctb_y_off = i4_ctb_y << 6;

            ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = i4_ctb_y << 6;
            ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = i4_ctb_x << 6;
            /* Initialize ptr to current IPE CTB */
            ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x +
                             i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
            {
                ps_ctb_bound_attrs =
                    get_ctb_attrs(i4_ctb_x << 6, i4_ctb_y << 6, i4_pic_wd, i4_pic_ht, ps_ctxt);

                en_merge_32x32 = ps_ctb_bound_attrs->u1_merge_to_32x32_flag;
                num_blks_in_this_ctb = ps_ctb_bound_attrs->u1_num_blks_in_ctb;
            }

            /* Block to initialise pointers to part_type_results_t */
            /* in each size-specific inter_cu_results_t  */
            {
                WORD32 i;

                for(i = 0; i < 64; i++)
                {
                    ps_ctxt->as_cu8x8_results[i].ps_best_results =
                        ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
                            .as_8x8_block_data[i]
                            .as_best_results;
                    ps_ctxt->as_cu8x8_results[i].u1_num_best_results = 0;
                }

                for(i = 0; i < 16; i++)
                {
                    ps_ctxt->as_cu16x16_results[i].ps_best_results =
                        ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].as_block_data[i].as_best_results;
                    ps_ctxt->as_cu16x16_results[i].u1_num_best_results = 0;
                }

                for(i = 0; i < 4; i++)
                {
                    ps_ctxt->as_cu32x32_results[i].ps_best_results =
                        ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
                            .as_32x32_block_data[i]
                            .as_best_results;
                    ps_ctxt->as_cu32x32_results[i].u1_num_best_results = 0;
                }

                ps_ctxt->s_cu64x64_results.ps_best_results =
                    ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].s_64x64_block_data.as_best_results;
                ps_ctxt->s_cu64x64_results.u1_num_best_results = 0;
            }

            if(ME_PRISTINE_QUALITY == e_me_quality_presets)
            {
                ps_ctb_cluster_info->blk_32x32_mask = en_merge_32x32;
                ps_ctb_cluster_info->ps_cur_ipe_ctb = ps_cur_ipe_ctb;
                ps_ctb_cluster_info->ps_cu_tree_root =
                    ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
                ps_ctb_cluster_info->nodes_created_in_cu_tree = 1;
            }

            if(ME_PRISTINE_QUALITY != e_me_quality_presets)
            {
                S32 i4_nodes_created_in_cu_tree = 1;

                ihevce_cu_tree_init(
                    (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
                    (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
                    &i4_nodes_created_in_cu_tree,
                    0,
                    POS_NA,
                    POS_NA,
                    POS_NA);
            }

            memset(ai4_blk_8x8_mask, 0, 16 * sizeof(S32));

            if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
            {
                S32 j;

                ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;

                ps_cur_ipe_ctb =
                    ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x + i4_ctb_y * num_sync_units_in_row;
                lambda_recon =
                    hme_recompute_lambda_from_min_8x8_act_in_ctb(ps_ctxt, ps_cur_ipe_ctb);

                lambda_recon = ((float)lambda_recon * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f);

                for(i = 0; i < 4; i++)
                {
                    ps_search_results = &ps_ctxt->as_search_results_32x32[i];

                    for(j = 0; j < 2; j++)
                    {
                        ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
                    }
                }
                ps_search_results = &ps_ctxt->s_search_results_64x64;

                for(j = 0; j < 2; j++)
                {
                    ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
                }

                s_common_frm_prms.i4_lamda = lambda_recon;
            }
            else
            {
                lambda_recon = ps_refine_prms->lambda_recon;
            }

            /*********************************************************************/
            /* replicate the inp buffer at blk or ctb level for each ref id,     */
            /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
            /* thereby avoiding a bloat up of memory. If we did all references   */
            /* weighted pred, we will end up with a duplicate copy of each ref   */
            /* at each layer, since we need to preserve the original reference.  */
            /* ToDo: Need to observe performance with this mechanism and compare */
            /* with case where ref is weighted.                                  */
            /*********************************************************************/
            fp_get_wt_inp(
                ps_curr_layer,
                &ps_ctxt->s_wt_pred,
                unit_size,
                s_common_frm_prms.i4_ctb_x_off,
                s_common_frm_prms.i4_ctb_y_off,
                unit_size,
                ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
                ps_ctxt->i4_wt_pred_enable_flag);

            if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled)
            {
#if TEMPORAL_NOISE_DETECT
                {
                    WORD32 had_block_size = 16;
                    WORD32 ctb_width = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
                                           ? 64
                                           : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
                    WORD32 ctb_height = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
                                            ? 64
                                            : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
                    WORD32 num_pred_dir = i4_num_pred_dir;
                    WORD32 i4_x_off = s_common_frm_prms.i4_ctb_x_off;
                    WORD32 i4_y_off = s_common_frm_prms.i4_ctb_y_off;

                    WORD32 i;
                    WORD32 noise_detected;
                    WORD32 ctb_size;
                    WORD32 num_comp_had_blocks;
                    WORD32 noisy_block_cnt;
                    WORD32 index_8x8_block;
                    WORD32 num_8x8_in_ctb_row;

                    WORD32 ht_offset;
                    WORD32 wd_offset;
                    WORD32 block_ht;
                    WORD32 block_wd;

                    WORD32 num_horz_blocks;
                    WORD32 num_vert_blocks;

                    WORD32 mean;
                    UWORD32 variance_8x8;

                    WORD32 hh_energy_percent;

                    /* variables to hold the constant values. The variable values held are decided by the HAD block size */
                    WORD32 min_noisy_block_cnt;
                    WORD32 min_coeffs_above_avg;
                    WORD32 min_coeff_avg_energy;

                    /* to store the mean and variance of each 8*8 block and find the variance of any higher block sizes later on. block */
                    WORD32 i4_cu_x_off, i4_cu_y_off;
                    WORD32 is_noisy;

                    /* intialise the variables holding the constants */
                    if(had_block_size == 8)
                    {
                        min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_8x8;  //6;//
                        min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_8x8;
                        min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_8x8;
                    }
                    else
                    {
                        min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_16x16;  //7;//
                        min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_16x16;
                        min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_16x16;
                    }

                    /* initialize the variables */
                    noise_detected = 0;
                    noisy_block_cnt = 0;
                    hh_energy_percent = 0;
                    variance_8x8 = 0;
                    block_ht = ctb_height;
                    block_wd = ctb_width;

                    mean = 0;

                    ctb_size = block_ht * block_wd;  //ctb_width * ctb_height;
                    num_comp_had_blocks = ctb_size / (had_block_size * had_block_size);

                    num_horz_blocks = block_wd / had_block_size;  //ctb_width / had_block_size;
                    num_vert_blocks = block_ht / had_block_size;  //ctb_height / had_block_size;

                    ht_offset = -had_block_size;
                    wd_offset = -had_block_size;

                    num_8x8_in_ctb_row = block_wd / 8;  // number of 8x8 in this ctb
                    for(i = 0; i < num_comp_had_blocks; i++)
                    {
                        if(i % num_horz_blocks == 0)
                        {
                            wd_offset = -had_block_size;
                            ht_offset += had_block_size;
                        }
                        wd_offset += had_block_size;

                        /* CU level offsets */
                        i4_cu_x_off = i4_x_off + (i % 4) * 16;  //+ (i % 4) * 16
                        i4_cu_y_off = i4_y_off + (i / 4) * 16;

                        /* if 50 % or more of the CU is noisy then the return value is 1 */
                        is_noisy = ihevce_determine_cu_noise_based_on_8x8Blk_data(
                            ps_ctb_noise_params->au1_is_8x8Blk_noisy,
                            (i % 4) * 16,
                            (i / 4) * 16,
                            16);

                        /* only if the CU is noisy then check the temporal noise detect call is made on the CU */
                        if(is_noisy)
                        {
                            index_8x8_block = (i / num_horz_blocks) * 2 * num_8x8_in_ctb_row +
                                              (i % num_horz_blocks) * 2;
                            noisy_block_cnt += ihevce_16x16block_temporal_noise_detect(
                                16,
                                ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
                                    ? 64
                                    : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off,
                                ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
                                    ? 64
                                    : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off,
                                ps_ctb_noise_params,
                                &s_srch_cand_init_data,
                                &s_search_prms_blk,
                                ps_ctxt,
                                num_pred_dir,
                                i4_num_act_ref_l0,
                                i4_num_act_ref_l1,
                                i4_cu_x_off,
                                i4_cu_y_off,
                                &ps_ctxt->s_wt_pred,
                                unit_size,
                                index_8x8_block,
                                num_horz_blocks,
                                /*num_8x8_in_ctb_row*/ 8,  // this should be a variable extra
                                i);
                        } /* if 16x16 is noisy */
                    } /* loop over for all 16x16*/

                    if(noisy_block_cnt >= min_noisy_block_cnt)
                    {
                        noise_detected = 1;
                    }

                    /* write back the noise presence detected for the current CTB to the structure */
                    ps_ctb_noise_params->i4_noise_present = noise_detected;
                }
#endif

#if EVERYWHERE_NOISY && USE_NOISE_TERM_IN_L0_ME
                if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled &&
                   ps_ctb_noise_params->i4_noise_present)
                {
                    memset(
                        ps_ctb_noise_params->au1_is_8x8Blk_noisy,
                        1,
                        sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
                }
#endif

                for(i = 0; i < 16; i++)
                {
                    au1_is_16x16Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
                        ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 4) * 16, (i / 4) * 16, 16);
                }

                for(i = 0; i < 4; i++)
                {
                    au1_is_32x32Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
                        ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 2) * 32, (i / 2) * 32, 32);
                }

                for(i = 0; i < 1; i++)
                {
                    au1_is_64x64Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
                        ps_ctb_noise_params->au1_is_8x8Blk_noisy, 0, 0, 64);
                }

                if(ps_ctxt->s_frm_prms.bidir_enabled &&
                   (ps_ctxt->s_frm_prms.i4_temporal_layer_id <=
                    MAX_LAYER_ID_OF_B_PICS_WITHOUT_NOISE_DETECTION))
                {
                    ps_ctb_noise_params->i4_noise_present = 0;
                    memset(
                        ps_ctb_noise_params->au1_is_8x8Blk_noisy,
                        0,
                        sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
                }

#if ME_LAMBDA_DISCOUNT_WHEN_NOISY
                for(i = 0; i < 4; i++)
                {
                    S32 j;
                    S32 lambda;

                    if(au1_is_32x32Blk_noisy[i])
                    {
                        lambda = lambda_recon;
                        lambda =
                            ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);

                        ps_search_results = &ps_ctxt->as_search_results_32x32[i];

                        for(j = 0; j < 2; j++)
                        {
                            ps_search_results->as_pred_ctxt[j].lambda = lambda;
                        }
                    }
                }

                {
                    S32 j;
                    S32 lambda;

                    if(au1_is_64x64Blk_noisy[0])
                    {
                        lambda = lambda_recon;
                        lambda =
                            ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);

                        ps_search_results = &ps_ctxt->s_search_results_64x64;

                        for(j = 0; j < 2; j++)
                        {
                            ps_search_results->as_pred_ctxt[j].lambda = lambda;
                        }
                    }
                }
#endif
                if(au1_is_64x64Blk_noisy[0])
                {
                    U08 *pu1_inp = ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
                                                             (s_common_frm_prms.i4_ctb_y_off *
                                                              ps_curr_layer->i4_inp_stride));

                    hme_compute_sigmaX_and_sigmaXSquared(
                        pu1_inp,
                        ps_curr_layer->i4_inp_stride,
                        ps_ctxt->au4_4x4_src_sigmaX,
                        ps_ctxt->au4_4x4_src_sigmaXSquared,
                        4,
                        4,
                        64,
                        64,
                        1,
                        16);
                }
                else
                {
                    for(i = 0; i < 4; i++)
                    {
                        if(au1_is_32x32Blk_noisy[i])
                        {
                            U08 *pu1_inp =
                                ps_curr_layer->pu1_inp +
                                (s_common_frm_prms.i4_ctb_x_off +
                                 (s_common_frm_prms.i4_ctb_y_off * ps_curr_layer->i4_inp_stride));

                            U08 u1_cu_size = 32;
                            WORD32 i4_inp_buf_offset =
                                (((i / 2) * (u1_cu_size * ps_curr_layer->i4_inp_stride)) +
                                 ((i % 2) * u1_cu_size));

                            U16 u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb = 128;
                            U16 u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb = 8;
                            S32 i4_sigma_arr_offset =
                                (((i / 2) * u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb) +
                                 ((i % 2) * u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb));

                            hme_compute_sigmaX_and_sigmaXSquared(
                                pu1_inp + i4_inp_buf_offset,
                                ps_curr_layer->i4_inp_stride,
                                ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset,
                                ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset,
                                4,
                                4,
                                32,
                                32,
                                1,
                                16);
                        }
                        else
                        {
                            S32 j;

                            U08 u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb = 8;
                            U08 u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb = 2;
                            S32 i4_16x16_blk_start_index_in_i_th_32x32_blk =
                                (((i / 2) * u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb) +
                                 ((i % 2) * u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb));

                            for(j = 0; j < 4; j++)
                            {
                                U08 u1_3rd_16x16_blk_index_in_32x32_blk = 4;
                                U08 u1_2nd_16x16_blk_index_in_32x32_blk = 1;
                                S32 i4_16x16_blk_index_in_ctb =
                                    i4_16x16_blk_start_index_in_i_th_32x32_blk +
                                    ((j % 2) * u1_2nd_16x16_blk_index_in_32x32_blk) +
                                    ((j / 2) * u1_3rd_16x16_blk_index_in_32x32_blk);

                                //S32 k = (((i / 2) * 8) + ((i % 2) * 2)) + ((j % 2) * 1) + ((j / 2) * 4);

                                if(au1_is_16x16Blk_noisy[i4_16x16_blk_index_in_ctb])
                                {
                                    U08 *pu1_inp =
                                        ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
                                                                  (s_common_frm_prms.i4_ctb_y_off *
                                                                   ps_curr_layer->i4_inp_stride));

                                    U08 u1_cu_size = 16;
                                    WORD32 i4_inp_buf_offset =
                                        (((i4_16x16_blk_index_in_ctb % 4) * u1_cu_size) +
                                         ((i4_16x16_blk_index_in_ctb / 4) *
                                          (u1_cu_size * ps_curr_layer->i4_inp_stride)));

                                    U16 u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk = 64;
                                    U16 u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk = 4;
                                    S32 i4_sigma_arr_offset =
                                        (((i4_16x16_blk_index_in_ctb % 4) *
                                          u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk) +
                                         ((i4_16x16_blk_index_in_ctb / 4) *
                                          u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk));

                                    hme_compute_sigmaX_and_sigmaXSquared(
                                        pu1_inp + i4_inp_buf_offset,
                                        ps_curr_layer->i4_inp_stride,
                                        (ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset),
                                        (ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset),
                                        4,
                                        4,
                                        16,
                                        16,
                                        1,
                                        16);
                                }
                            }
                        }
                    }
                }
            }
            else
            {
                memset(au1_is_16x16Blk_noisy, 0, sizeof(au1_is_16x16Blk_noisy));

                memset(au1_is_32x32Blk_noisy, 0, sizeof(au1_is_32x32Blk_noisy));

                memset(au1_is_64x64Blk_noisy, 0, sizeof(au1_is_64x64Blk_noisy));
            }

            for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
            {
                S32 ref_ctr;
                U08 au1_pred_dir_searched[2];
                U08 u1_is_cu_noisy;
                ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];

                {
                    blk_x = (i4_ctb_x << 2) +
                            (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_x);
                    blk_y = (i4_ctb_y << 2) +
                            (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_y);

                    blk_id_in_full_ctb =
                        ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_id_in_full_ctb;
                    blk_8x8_mask = ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_8x8_mask;
                    ai4_blk_8x8_mask[blk_id_in_full_ctb] = blk_8x8_mask;
                    s_search_prms_blk.i4_cu_x_off = (blk_x << blk_size_shift) - (i4_ctb_x << 6);
                    s_search_prms_blk.i4_cu_y_off = (blk_y << blk_size_shift) - (i4_ctb_y << 6);
                }

                /* get the current input blk point */
                pos_x = blk_x << blk_size_shift;
                pos_y = blk_y << blk_size_shift;
                pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);

                /*********************************************************************/
                /* For every blk in the picture, the search range needs to be derived*/
                /* Any blk can have any mv, but practical search constraints are     */
                /* imposed by the picture boundary and amt of padding.               */
                /*********************************************************************/
                /* MV limit is different based on ref. PIC */
                for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
                {
                    if(!s_search_prms_blk.i4_use_rec)
                    {
                        hme_derive_search_range(
                            &as_range_prms_inp[ref_ctr],
                            &s_pic_limit_inp,
                            &as_mv_limit[ref_ctr],
                            pos_x,
                            pos_y,
                            blk_wd,
                            blk_ht);
                    }
                    else
                    {
                        hme_derive_search_range(
                            &as_range_prms_rec[ref_ctr],
                            &s_pic_limit_rec,
                            &as_mv_limit[ref_ctr],
                            pos_x,
                            pos_y,
                            blk_wd,
                            blk_ht);
                    }
                }
                s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
                s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
                /* Select search results from a suitable search result in the context */
                {
                    ps_search_results = &ps_ctxt->as_search_results_16x16[blk_id_in_full_ctb];

                    if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
                    {
                        S32 i;

                        for(i = 0; i < 2; i++)
                        {
                            ps_search_results->as_pred_ctxt[i].lambda = lambda_recon;
                        }
                    }
                }

                u1_is_cu_noisy = au1_is_16x16Blk_noisy
                    [(s_search_prms_blk.i4_cu_x_off >> 4) + (s_search_prms_blk.i4_cu_y_off >> 2)];

                s_subpel_prms.u1_is_cu_noisy = u1_is_cu_noisy;

#if ME_LAMBDA_DISCOUNT_WHEN_NOISY
                if(u1_is_cu_noisy)
                {
                    S32 j;
                    S32 lambda;

                    lambda = lambda_recon;
                    lambda = ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);

                    for(j = 0; j < 2; j++)
                    {
                        ps_search_results->as_pred_ctxt[j].lambda = lambda;
                    }
                }
                else
                {
                    S32 j;
                    S32 lambda;

                    lambda = lambda_recon;

                    for(j = 0; j < 2; j++)
                    {
                        ps_search_results->as_pred_ctxt[j].lambda = lambda;
                    }
                }
#endif

                s_search_prms_blk.ps_search_results = ps_search_results;

                s_search_prms_blk.i4_part_mask = hme_part_mask_populator(
                    pu1_inp,
                    i4_inp_stride,
                    ps_refine_prms->limit_active_partitions,
                    ps_ctxt->ps_hme_frm_prms->bidir_enabled,
                    ps_ctxt->u1_is_curFrame_a_refFrame,
                    blk_8x8_mask,
                    e_me_quality_presets);

                if(ME_PRISTINE_QUALITY == e_me_quality_presets)
                {
                    ps_ctb_cluster_info->ai4_part_mask[blk_id_in_full_ctb] =
                        s_search_prms_blk.i4_part_mask;
                }

                /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
                {
                    /* Setting u1_num_active_refs to 2 */
                    /* for the sole purpose of the */
                    /* function called below */
                    ps_search_results->u1_num_active_ref = (ps_refine_prms->bidir_enabled) ? 2 : 1;

                    hme_reset_search_results(
                        ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);

                    ps_search_results->u1_num_active_ref = i4_num_pred_dir;
                }

                if(0 == blk_id_in_ctb)
                {
                    UWORD8 u1_ctr;
                    for(u1_ctr = 0; u1_ctr < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
                                              ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
                        u1_ctr++)
                    {
                        WORD32 i4_max_dep_ctb_y;
                        WORD32 i4_max_dep_ctb_x;

                        /* Set max mv in ctb units */
                        i4_max_mv_x_in_ctb =
                            (ps_curr_layer->i2_max_mv_x + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
                            ps_ctxt->log_ctb_size;

                        i4_max_mv_y_in_ctb =
                            (as_mv_limit[u1_ctr].i2_max_y + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
                            ps_ctxt->log_ctb_size;
                        /********************************************************************/
                        /* Set max ctb_x and ctb_y dependency on reference picture          */
                        /* Note +1 is due to delayed deblock, SAO, subpel plan dependency   */
                        /********************************************************************/
                        i4_max_dep_ctb_x = CLIP3(
                            (i4_ctb_x + i4_max_mv_x_in_ctb + 1),
                            0,
                            ps_frm_ctb_prms->i4_num_ctbs_horz - 1);
                        i4_max_dep_ctb_y = CLIP3(
                            (i4_ctb_y + i4_max_mv_y_in_ctb + 1),
                            0,
                            ps_frm_ctb_prms->i4_num_ctbs_vert - 1);

                        ihevce_dmgr_map_chk_sync(
                            ps_curr_layer->ppv_dep_mngr_recon[u1_ctr],
                            ps_ctxt->thrd_id,
                            i4_ctb_x,
                            i4_ctb_y,
                            i4_max_mv_x_in_ctb,
                            i4_max_mv_y_in_ctb);
                    }
                }

                /* Loop across different Ref IDx */
                for(u1_pred_dir_ctr = 0; u1_pred_dir_ctr < i4_num_pred_dir; u1_pred_dir_ctr++)
                {
                    S32 resultid;
                    S08 u1_default_ref_id;
                    S32 i4_num_srch_cands = 0;
                    S32 i4_num_refinement_iterations;
                    S32 i4_refine_iter_ctr;

                    if((i4_num_pred_dir == 2) || (!ps_ctxt->s_frm_prms.bidir_enabled) ||
                       (ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0))
                    {
                        u1_pred_dir = u1_pred_dir_ctr;
                    }
                    else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
                    {
                        u1_pred_dir = 1;
                    }

                    u1_default_ref_id = (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list[0]
                                                           : ps_ctxt->ai1_future_list[0];
                    au1_pred_dir_searched[u1_pred_dir_ctr] = u1_pred_dir;

                    i4_num_srch_cands = 0;
                    resultid = 0;

                    /* START OF NEW CTB MEANS FILL UP NEOGHBOURS IN 18x18 GRID */
                    if(0 == blk_id_in_ctb)
                    {
                        /*****************************************************************/
                        /* Initialize the mv grid with results of neighbours for the next*/
                        /* ctb.                                                          */
                        /*****************************************************************/
                        hme_fill_ctb_neighbour_mvs(
                            ps_curr_layer,
                            blk_x,
                            blk_y,
                            aps_mv_grid[u1_pred_dir],
                            u1_pred_dir_ctr,
                            u1_default_ref_id,
                            ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
                    }

                    s_search_prms_blk.i1_ref_idx = u1_pred_dir;

                    {
                        if((blk_id_in_full_ctb % 4) == 0)
                        {
                            ps_ctxt->as_search_results_32x32[blk_id_in_full_ctb >> 2]
                                .as_pred_ctxt[u1_pred_dir]
                                .proj_used = (blk_id_in_full_ctb == 8) ? 0 : 1;
                        }

                        if(blk_id_in_full_ctb == 0)
                        {
                            ps_ctxt->s_search_results_64x64.as_pred_ctxt[u1_pred_dir].proj_used = 1;
                        }

                        ps_search_results->as_pred_ctxt[u1_pred_dir].proj_used =
                            !gau1_encode_to_raster_y[blk_id_in_full_ctb];
                    }

                    {
                        S32 x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
                        S32 y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
                        U08 u1_is_blk_at_ctb_boundary = !y;

                        s_srch_cand_init_data.u1_is_left_available =
                            !(left_ctb_in_diff_tile && !s_search_prms_blk.i4_cu_x_off);

                        if(u1_is_blk_at_ctb_boundary)
                        {
                            s_srch_cand_init_data.u1_is_topRight_available = 0;
                            s_srch_cand_init_data.u1_is_topLeft_available = 0;
                            s_srch_cand_init_data.u1_is_top_available = 0;
                        }
                        else
                        {
                            s_srch_cand_init_data.u1_is_topRight_available =
                                gau1_cu_tr_valid[y][x] && ((pos_x + blk_wd) < i4_pic_wd);
                            s_srch_cand_init_data.u1_is_top_available = 1;
                            s_srch_cand_init_data.u1_is_topLeft_available =
                                s_srch_cand_init_data.u1_is_left_available;
                        }
                    }

                    s_srch_cand_init_data.i1_default_ref_id = u1_default_ref_id;
                    s_srch_cand_init_data.i1_alt_default_ref_id = ps_ctxt->ai1_past_list[1];
                    s_srch_cand_init_data.i4_pos_x = pos_x;
                    s_srch_cand_init_data.i4_pos_y = pos_y;
                    s_srch_cand_init_data.u1_pred_dir = u1_pred_dir;
                    s_srch_cand_init_data.u1_pred_dir_ctr = u1_pred_dir_ctr;
                    s_srch_cand_init_data.u1_search_candidate_list_index =
                        au1_search_candidate_list_index[u1_pred_dir];

                    i4_num_srch_cands = hme_populate_search_candidates(&s_srch_cand_init_data);

                    /* Note this block also clips the MV range for all candidates */
                    {
                        S08 i1_check_for_mult_refs;

                        i1_check_for_mult_refs = u1_pred_dir ? (ps_ctxt->num_ref_future > 1)
                                                             : (ps_ctxt->num_ref_past > 1);

                        ps_me_optimised_function_list->pf_mv_clipper(
                            &s_search_prms_blk,
                            i4_num_srch_cands,
                            i1_check_for_mult_refs,
                            ps_refine_prms->i4_num_steps_fpel_refine,
                            ps_refine_prms->i4_num_steps_hpel_refine,
                            ps_refine_prms->i4_num_steps_qpel_refine);
                    }

#if ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0
                    i4_num_refinement_iterations =
                        ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1))
                            ? ((e_me_quality_presets == ME_HIGH_QUALITY) ? 2 : i4_num_act_ref_l0)
                            : 1;
#else
                    i4_num_refinement_iterations =
                        ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1)) ? 2 : 1;
#endif

#if ENABLE_EXPLICIT_SEARCH_IN_PQ
                    if(e_me_quality_presets == ME_PRISTINE_QUALITY)
                    {
                        i4_num_refinement_iterations = (u1_pred_dir == 0) ? i4_num_act_ref_l0
                                                                          : i4_num_act_ref_l1;
                    }
#endif

                    for(i4_refine_iter_ctr = 0; i4_refine_iter_ctr < i4_num_refinement_iterations;
                        i4_refine_iter_ctr++)
                    {
                        S32 center_x;
                        S32 center_y;
                        S32 center_ref_idx;

                        S08 *pi1_pred_dir_to_ref_idx =
                            (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list : ps_ctxt->ai1_future_list;

                        {
                            WORD32 i4_i;

                            for(i4_i = 0; i4_i < TOT_NUM_PARTS; i4_i++)
                            {
                                ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
                                ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
                                ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i] =
                                    MAX_SIGNED_16BIT_VAL;
                                ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i] = 0;
                                ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i] = 0;
                                ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i] = u1_default_ref_id;

                                if(ps_refine_prms->i4_num_results_per_part == 2)
                                {
                                    ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i] =
                                        MAX_SIGNED_16BIT_VAL;
                                    ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i] =
                                        MAX_SIGNED_16BIT_VAL;
                                    ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i] =
                                        MAX_SIGNED_16BIT_VAL;
                                    ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i] = 0;
                                    ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i] = 0;
                                    ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i] = u1_default_ref_id;
                                }
                            }

                            s_search_prms_blk.ps_fullpel_refine_ctxt = ps_fullpel_refine_ctxt;
                            s_subpel_prms.ps_subpel_refine_ctxt = ps_fullpel_refine_ctxt;
                        }

                        {
                            search_node_t *ps_coloc_node;

                            S32 i = 0;

                            if(i4_num_refinement_iterations > 1)
                            {
                                for(i = 0; i < ai4_num_coloc_cands[u1_pred_dir]; i++)
                                {
                                    ps_coloc_node =
                                        s_search_prms_blk.ps_search_candts[ai4_id_coloc[i]]
                                            .ps_search_node;

                                    if(pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr] ==
                                       ps_coloc_node->i1_ref_idx)
                                    {
                                        break;
                                    }
                                }

                                if(i == ai4_num_coloc_cands[u1_pred_dir])
                                {
                                    i = 0;
                                }
                            }
                            else
                            {
                                ps_coloc_node = s_search_prms_blk.ps_search_candts[ai4_id_coloc[0]]
                                                    .ps_search_node;
                            }

                            hme_set_mvp_node(
                                ps_search_results,
                                ps_coloc_node,
                                u1_pred_dir,
                                (i4_num_refinement_iterations > 1)
                                    ? pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr]
                                    : u1_default_ref_id);

                            center_x = ps_coloc_node->ps_mv->i2_mvx;
                            center_y = ps_coloc_node->ps_mv->i2_mvy;
                            center_ref_idx = ps_coloc_node->i1_ref_idx;
                        }

                        /* Full-Pel search */
                        {
                            S32 num_unique_nodes;

                            memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));

                            num_unique_nodes = hme_remove_duplicate_fpel_search_candidates(
                                as_unique_search_nodes,
                                s_search_prms_blk.ps_search_candts,
                                au4_unique_node_map,
                                pi1_pred_dir_to_ref_idx,
                                i4_num_srch_cands,
                                s_search_prms_blk.i4_num_init_candts,
                                i4_refine_iter_ctr,
                                i4_num_refinement_iterations,
                                i4_num_act_ref_l0,
                                center_ref_idx,
                                center_x,
                                center_y,
                                ps_ctxt->s_frm_prms.bidir_enabled,
                                e_me_quality_presets);

                            /*************************************************************************/
                            /* This array stores the ids of the partitions whose                     */
                            /* SADs are updated. Since the partitions whose SADs are updated may not */
                            /* be in contiguous order, we supply another level of indirection.       */
                            /*************************************************************************/
                            ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids(
                                s_search_prms_blk.i4_part_mask,
                                &ps_fullpel_refine_ctxt->ai4_part_id[0]);

                            if(!i4_refine_iter_ctr && !u1_pred_dir_ctr && u1_is_cu_noisy)
                            {
                                S32 i;
                                /*i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values*/
                                S32 i4_sigma_array_offset = (s_search_prms_blk.i4_cu_x_off / 4) +
                                                            (s_search_prms_blk.i4_cu_y_off * 4);

                                for(i = 0; i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i++)
                                {
                                    S32 i4_part_id = ps_fullpel_refine_ctxt->ai4_part_id[i];

                                    hme_compute_final_sigma_of_pu_from_base_blocks(
                                        ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
                                        ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
                                        au8_final_src_sigmaX,
                                        au8_final_src_sigmaXSquared,
                                        16,
                                        4,
                                        i4_part_id,
                                        16);
                                }

                                s_common_frm_prms.pu8_part_src_sigmaX = au8_final_src_sigmaX;
                                s_common_frm_prms.pu8_part_src_sigmaXSquared =
                                    au8_final_src_sigmaXSquared;

                                s_search_prms_blk.pu8_part_src_sigmaX = au8_final_src_sigmaX;
                                s_search_prms_blk.pu8_part_src_sigmaXSquared =
                                    au8_final_src_sigmaXSquared;
                            }

                            if(0 == num_unique_nodes)
                            {
                                continue;
                            }

                            if(num_unique_nodes >= 2)
                            {
                                s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
                                s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
                                if(ps_ctxt->i4_pic_type != IV_P_FRAME)
                                {
                                    if(ps_ctxt->i4_temporal_layer == 1)
                                    {
                                        hme_fullpel_cand_sifter(
                                            &s_search_prms_blk,
                                            ps_curr_layer,
                                            &ps_ctxt->s_wt_pred,
                                            ALPHA_FOR_NOISE_TERM_IN_ME,
                                            u1_is_cu_noisy,
                                            ps_me_optimised_function_list);
                                    }
                                    else
                                    {
                                        hme_fullpel_cand_sifter(
                                            &s_search_prms_blk,
                                            ps_curr_layer,
                                            &ps_ctxt->s_wt_pred,
                                            ALPHA_FOR_NOISE_TERM_IN_ME,
                                            u1_is_cu_noisy,
                                            ps_me_optimised_function_list);
                                    }
                                }
                                else
                                {
                                    hme_fullpel_cand_sifter(
                                        &s_search_prms_blk,
                                        ps_curr_layer,
                                        &ps_ctxt->s_wt_pred,
                                        ALPHA_FOR_NOISE_TERM_IN_ME_P,
                                        u1_is_cu_noisy,
                                        ps_me_optimised_function_list);
                                }
                            }

                            s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];

                            hme_fullpel_refine(
                                ps_refine_prms,
                                &s_search_prms_blk,
                                ps_curr_layer,
                                &ps_ctxt->s_wt_pred,
                                au4_unique_node_map,
                                num_unique_nodes,
                                blk_8x8_mask,
                                center_x,
                                center_y,
                                center_ref_idx,
                                e_me_quality_presets,
                                ps_me_optimised_function_list);
                        }

                        /* Sub-Pel search */
                        {
                            hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);

                            s_subpel_prms.pu1_wkg_mem = (U08 *)hme_get_wkg_mem(
                                &ps_ctxt->s_buf_mgr,
                                INTERP_INTERMED_BUF_SIZE + INTERP_OUT_BUF_SIZE);
                            /* MV limit is different based on ref. PIC */
                            for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
                            {
                                SCALE_RANGE_PRMS(
                                    as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
                                SCALE_RANGE_PRMS(
                                    as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
                            }
                            s_subpel_prms.i4_ctb_x_off = i4_ctb_x << 6;
                            s_subpel_prms.i4_ctb_y_off = i4_ctb_y << 6;

                            hme_subpel_refine_cu_hs(
                                &s_subpel_prms,
                                ps_curr_layer,
                                ps_search_results,
                                u1_pred_dir,
                                &ps_ctxt->s_wt_pred,
                                blk_8x8_mask,
                                ps_ctxt->ps_func_selector,
                                ps_cmn_utils_optimised_function_list,
                                ps_me_optimised_function_list);
                        }
                    }
                }
                /* Populate the new PU struct with the results post subpel refinement*/
                {
                    inter_cu_results_t *ps_cu_results;
                    WORD32 best_inter_cost, intra_cost, posx, posy;

                    UWORD8 intra_8x8_enabled = 0;

                    /*  cost of 16x16 cu parent  */
                    WORD32 parent_cost = MAX_32BIT_VAL;

                    /*  cost of 8x8 cu children  */
                    /*********************************************************************/
                    /* Assuming parent is not split, then we signal 1 bit for this parent*/
                    /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
                    /* So, 4*lambda is extra for children cost.                          */
                    /*********************************************************************/
                    WORD32 child_cost = 0;

                    ps_cu_results = ps_search_results->ps_cu_results;

                    /* Initialize the pu_results pointers to the first struct in the stack array */
                    ps_pu_results = as_inter_pu_results;

                    hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);

                    hme_populate_pus(
                        ps_thrd_ctxt,
                        ps_ctxt,
                        &s_subpel_prms,
                        ps_search_results,
                        ps_cu_results,
                        ps_pu_results,
                        &(as_pu_results[0][0][0]),
                        &s_common_frm_prms,
                        &ps_ctxt->s_wt_pred,
                        ps_curr_layer,
                        au1_pred_dir_searched,
                        i4_num_pred_dir);

                    ps_cu_results->i4_inp_offset =
                        (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);

                    hme_decide_part_types(
                        ps_cu_results,
                        ps_pu_results,
                        &s_common_frm_prms,
                        ps_ctxt,
                        ps_cmn_utils_optimised_function_list,
                        ps_me_optimised_function_list

                    );

                    /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
                    /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
                    if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
                    {
                        WORD32 res_ctr;

                        for(res_ctr = 0; res_ctr < ps_cu_results->u1_num_best_results; res_ctr++)
                        {
                            WORD32 num_part = 2, part_ctr;
                            part_type_results_t *ps_best_results =
                                &ps_cu_results->ps_best_results[res_ctr];

                            if(PRT_2Nx2N == ps_best_results->u1_part_type)
                                num_part = 1;

                            for(part_ctr = 0; part_ctr < num_part; part_ctr++)
                            {
                                pu_result_t *ps_pu_results =
                                    &ps_best_results->as_pu_results[part_ctr];

                                ASSERT(PRED_L0 == ps_pu_results->pu.b2_pred_mode);

                                hme_update_dynamic_search_params(
                                    &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p]
                                         .as_dyn_range_prms[ps_pu_results->pu.mv.i1_l0_ref_idx],
                                    ps_pu_results->pu.mv.s_l0_mv.i2_mvy);

                                /* Sanity Check */
                                ASSERT(
                                    ps_pu_results->pu.mv.i1_l0_ref_idx <
                                    ps_ctxt->s_frm_prms.u1_num_active_ref_l0);

                                /* No L1 for P Pic. */
                                ASSERT(PRED_L1 != ps_pu_results->pu.b2_pred_mode);
                                /* No BI for P Pic. */
                                ASSERT(PRED_BI != ps_pu_results->pu.b2_pred_mode);
                            }
                        }
                    }

                    /*****************************************************************/
                    /* INSERT INTRA RESULTS AT 16x16 LEVEL.                          */
                    /*****************************************************************/

#if DISABLE_INTRA_IN_BPICS
                    if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
                             (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
#endif
                    {
                        if(!(DISABLE_INTRA_WHEN_NOISY && s_common_frm_prms.u1_is_cu_noisy))
                        {
                            hme_insert_intra_nodes_post_bipred(
                                ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
                        }
                    }

#if DISABLE_INTRA_IN_BPICS
                    if((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
                       (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
                    {
                        intra_8x8_enabled = 0;
                    }
                    else
#endif
                    {
                        /*TRAQO intra flag updation*/
                        if(1 == ps_cu_results->ps_best_results->as_pu_results[0].pu.b1_intra_flag)
                        {
                            best_inter_cost =
                                ps_cu_results->ps_best_results->as_pu_results[1].i4_tot_cost;
                            intra_cost =
                                ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
                            /*@16x16 level*/
                            posx = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_x
                                    << 2) >>
                                   4;
                            posy = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_y
                                    << 2) >>
                                   4;
                        }
                        else
                        {
                            best_inter_cost =
                                ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
                            posx = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_x
                                    << 2) >>
                                   3;
                            posy = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_y
                                    << 2) >>
                                   3;
                        }

                        /* Disable intra16/32/64 flags based on split flags recommended by IPE */
                        if(ps_cur_ipe_ctb->u1_split_flag)
                        {
                            /* Id of the 32x32 block, 16x16 block in a CTB */
                            WORD32 i4_32x32_id =
                                (ps_cu_results->u1_y_off >> 5) * 2 + (ps_cu_results->u1_x_off >> 5);
                            WORD32 i4_16x16_id = ((ps_cu_results->u1_y_off >> 4) & 0x1) * 2 +
                                                 ((ps_cu_results->u1_x_off >> 4) & 0x1);

                            if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
                            {
                                if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
                                       .as_intra16_analyse[i4_16x16_id]
                                       .b1_split_flag)
                                {
                                    intra_8x8_enabled =
                                        ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
                                            .as_intra16_analyse[i4_16x16_id]
                                            .as_intra8_analyse[0]
                                            .b1_valid_cu;
                                    intra_8x8_enabled &=
                                        ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
                                            .as_intra16_analyse[i4_16x16_id]
                                            .as_intra8_analyse[1]
                                            .b1_valid_cu;
                                    intra_8x8_enabled &=
                                        ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
                                            .as_intra16_analyse[i4_16x16_id]
                                            .as_intra8_analyse[2]
                                            .b1_valid_cu;
                                    intra_8x8_enabled &=
                                        ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
                                            .as_intra16_analyse[i4_16x16_id]
                                            .as_intra8_analyse[3]
                                            .b1_valid_cu;
                                }
                            }
                        }
                    }

                    if(blk_8x8_mask == 0xf)
                    {
                        parent_cost =
                            ps_search_results->ps_cu_results->ps_best_results[0].i4_tot_cost;
                        ps_search_results->u1_split_flag = 0;
                    }
                    else
                    {
                        ps_search_results->u1_split_flag = 1;
                    }

                    ps_cu_results = &ps_ctxt->as_cu8x8_results[blk_id_in_full_ctb << 2];

                    if(s_common_frm_prms.u1_is_cu_noisy)
                    {
                        intra_8x8_enabled = 0;
                    }

                    /* Evalaute 8x8 if NxN part id is enabled */
                    if((ps_search_results->i4_part_mask & ENABLE_NxN) || intra_8x8_enabled)
                    {
                        /* Populates the PU's for the 4 8x8's in one call */
                        hme_populate_pus_8x8_cu(
                            ps_thrd_ctxt,
                            ps_ctxt,
                            &s_subpel_prms,
                            ps_search_results,
                            ps_cu_results,
                            ps_pu_results,
                            &(as_pu_results[0][0][0]),
                            &s_common_frm_prms,
                            au1_pred_dir_searched,
                            i4_num_pred_dir,
                            blk_8x8_mask);

                        /* Re-initialize the pu_results pointers to the first struct in the stack array */
                        ps_pu_results = as_inter_pu_results;

                        for(i = 0; i < 4; i++)
                        {
                            if((blk_8x8_mask & (1 << i)))
                            {
                                if(ps_cu_results->i4_part_mask)
                                {
                                    hme_decide_part_types(
                                        ps_cu_results,
                                        ps_pu_results,
                                        &s_common_frm_prms,
                                        ps_ctxt,
                                        ps_cmn_utils_optimised_function_list,
                                        ps_me_optimised_function_list

                                    );
                                }
                                /*****************************************************************/
                                /* INSERT INTRA RESULTS AT 8x8 LEVEL.                          */
                                /*****************************************************************/
#if DISABLE_INTRA_IN_BPICS
                                if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
                                         (ps_ctxt->s_frm_prms.i4_temporal_layer_id >
                                          TEMPORAL_LAYER_DISABLE)))
#endif
                                {
                                    if(!(DISABLE_INTRA_WHEN_NOISY &&
                                         s_common_frm_prms.u1_is_cu_noisy))
                                    {
                                        hme_insert_intra_nodes_post_bipred(
                                            ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
                                    }
                                }

                                child_cost += ps_cu_results->ps_best_results[0].i4_tot_cost;
                            }

                            ps_cu_results++;
                            ps_pu_results++;
                        }

                        /* Compare 16x16 vs 8x8 cost */
                        if(child_cost < parent_cost)
                        {
                            ps_search_results->best_cu_cost = child_cost;
                            ps_search_results->u1_split_flag = 1;
                        }
                    }
                }

                hme_update_mv_bank_encode(
                    ps_search_results,
                    ps_curr_layer->ps_layer_mvbank,
                    blk_x,
                    blk_y,
                    &s_mv_update_prms,
                    au1_pred_dir_searched,
                    i4_num_act_ref_l0);

                /*********************************************************************/
                /* Map the best results to an MV Grid. This is a 18x18 grid that is  */
                /* useful for doing things like predictor for cost calculation or    */
                /* also for merge calculations if need be.                           */
                /*********************************************************************/
                hme_map_mvs_to_grid(
                    &aps_mv_grid[0], ps_search_results, au1_pred_dir_searched, i4_num_pred_dir);
            }

            /* Set the CU tree nodes appropriately */
            if(e_me_quality_presets != ME_PRISTINE_QUALITY)
            {
                WORD32 i, j;

                for(i = 0; i < 16; i++)
                {
                    cur_ctb_cu_tree_t *ps_tree_node =
                        ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
                    search_results_t *ps_results = &ps_ctxt->as_search_results_16x16[i];

                    switch(i >> 2)
                    {
                    case 0:
                    {
                        ps_tree_node = ps_tree_node->ps_child_node_tl;

                        break;
                    }
                    case 1:
                    {
                        ps_tree_node = ps_tree_node->ps_child_node_tr;

                        break;
                    }
                    case 2:
                    {
                        ps_tree_node = ps_tree_node->ps_child_node_bl;

                        break;
                    }
                    case 3:
                    {
                        ps_tree_node = ps_tree_node->ps_child_node_br;

                        break;
                    }
                    }

                    switch(i % 4)
                    {
                    case 0:
                    {
                        ps_tree_node = ps_tree_node->ps_child_node_tl;

                        break;
                    }
                    case 1:
                    {
                        ps_tree_node = ps_tree_node->ps_child_node_tr;

                        break;
                    }
                    case 2:
                    {
                        ps_tree_node = ps_tree_node->ps_child_node_bl;

                        break;
                    }
                    case 3:
                    {
                        ps_tree_node = ps_tree_node->ps_child_node_br;

                        break;
                    }
                    }

                    if(ai4_blk_8x8_mask[i] == 15)
                    {
                        if(!ps_results->u1_split_flag)
                        {
                            ps_tree_node->is_node_valid = 1;
                            NULLIFY_THE_CHILDREN_NODES(ps_tree_node);
                        }
                        else
                        {
                            ps_tree_node->is_node_valid = 0;
                            ENABLE_THE_CHILDREN_NODES(ps_tree_node);
                        }
                    }
                    else
                    {
                        cur_ctb_cu_tree_t *ps_tree_child;

                        ps_tree_node->is_node_valid = 0;

                        for(j = 0; j < 4; j++)
                        {
                            switch(j)
                            {
                            case 0:
                            {
                                ps_tree_child = ps_tree_node->ps_child_node_tl;

                                break;
                            }
                            case 1:
                            {
                                ps_tree_child = ps_tree_node->ps_child_node_tr;

                                break;
                            }
                            case 2:
                            {
                                ps_tree_child = ps_tree_node->ps_child_node_bl;

                                break;
                            }
                            case 3:
                            {
                                ps_tree_child = ps_tree_node->ps_child_node_br;

                                break;
                            }
                            }

                            ps_tree_child->is_node_valid = !!(ai4_blk_8x8_mask[i] & (1 << j));
                        }
                    }
                }
            }

            if(ME_PRISTINE_QUALITY == e_me_quality_presets)
            {
                cur_ctb_cu_tree_t *ps_tree = ps_ctb_cluster_info->ps_cu_tree_root;

                hme_analyse_mv_clustering(
                    ps_ctxt->as_search_results_16x16,
                    ps_ctxt->as_cu16x16_results,
                    ps_ctxt->as_cu8x8_results,
                    ps_ctxt->ps_ctb_cluster_info,
                    ps_ctxt->ai1_future_list,
                    ps_ctxt->ai1_past_list,
                    ps_ctxt->s_frm_prms.bidir_enabled,
                    e_me_quality_presets);

#if DISABLE_BLK_MERGE_WHEN_NOISY
                ps_tree->ps_child_node_tl->is_node_valid = !au1_is_32x32Blk_noisy[0];
                ps_tree->ps_child_node_tr->is_node_valid = !au1_is_32x32Blk_noisy[1];
                ps_tree->ps_child_node_bl->is_node_valid = !au1_is_32x32Blk_noisy[2];
                ps_tree->ps_child_node_br->is_node_valid = !au1_is_32x32Blk_noisy[3];
                ps_tree->ps_child_node_tl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[0];
                ps_tree->ps_child_node_tr->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[1];
                ps_tree->ps_child_node_bl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[2];
                ps_tree->ps_child_node_br->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[3];
                ps_tree->is_node_valid = !au1_is_64x64Blk_noisy[0];
                ps_tree->u1_inter_eval_enable = !au1_is_64x64Blk_noisy[0];
#endif

                en_merge_32x32 = (ps_tree->ps_child_node_tl->is_node_valid << 0) |
                                 (ps_tree->ps_child_node_tr->is_node_valid << 1) |
                                 (ps_tree->ps_child_node_bl->is_node_valid << 2) |
                                 (ps_tree->ps_child_node_br->is_node_valid << 3);

                en_merge_execution = (ps_tree->ps_child_node_tl->u1_inter_eval_enable << 0) |
                                     (ps_tree->ps_child_node_tr->u1_inter_eval_enable << 1) |
                                     (ps_tree->ps_child_node_bl->u1_inter_eval_enable << 2) |
                                     (ps_tree->ps_child_node_br->u1_inter_eval_enable << 3) |
                                     (ps_tree->u1_inter_eval_enable << 4);
            }
            else
            {
                en_merge_execution = 0x1f;

#if DISABLE_BLK_MERGE_WHEN_NOISY
                en_merge_32x32 = ((!au1_is_32x32Blk_noisy[0] << 0) & (en_merge_32x32 & 1)) |
                                 ((!au1_is_32x32Blk_noisy[1] << 1) & (en_merge_32x32 & 2)) |
                                 ((!au1_is_32x32Blk_noisy[2] << 2) & (en_merge_32x32 & 4)) |
                                 ((!au1_is_32x32Blk_noisy[3] << 3) & (en_merge_32x32 & 8));
#endif
            }

            /* Re-initialize the pu_results pointers to the first struct in the stack array */
            ps_pu_results = as_inter_pu_results;

            {
                WORD32 ref_ctr;

                s_ctb_prms.i4_ctb_x = i4_ctb_x << 6;
                s_ctb_prms.i4_ctb_y = i4_ctb_y << 6;

                /* MV limit is different based on ref. PIC */
                for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
                {
                    SCALE_RANGE_PRMS(as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
                    SCALE_RANGE_PRMS(as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
                }

                e_merge_result = CU_SPLIT;
                merge_count_32x32 = 0;

                if((en_merge_32x32 & 1) && (en_merge_execution & 1))
                {
                    range_prms_t *ps_pic_limit;
                    if(s_merge_prms_32x32_tl.i4_use_rec == 1)
                    {
                        ps_pic_limit = &s_pic_limit_rec;
                    }
                    else
                    {
                        ps_pic_limit = &s_pic_limit_inp;
                    }
                    /* MV limit is different based on ref. PIC */
                    for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
                    {
                        hme_derive_search_range(
                            s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
                            ps_pic_limit,
                            &as_mv_limit[ref_ctr],
                            i4_ctb_x << 6,
                            i4_ctb_y << 6,
                            32,
                            32);

                        SCALE_RANGE_PRMS_POINTERS(
                            s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
                            s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
                            2);
                    }
                    s_merge_prms_32x32_tl.i4_ctb_x_off = i4_ctb_x << 6;
                    s_merge_prms_32x32_tl.i4_ctb_y_off = i4_ctb_y << 6;
                    s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[0];

                    e_merge_result = hme_try_merge_high_speed(
                        ps_thrd_ctxt,
                        ps_ctxt,
                        ps_cur_ipe_ctb,
                        &s_subpel_prms,
                        &s_merge_prms_32x32_tl,
                        ps_pu_results,
                        &as_pu_results[0][0][0]);

                    if(e_merge_result == CU_MERGED)
                    {
                        inter_cu_results_t *ps_cu_results =
                            s_merge_prms_32x32_tl.ps_results_merge->ps_cu_results;

                        if(!((ps_cu_results->u1_num_best_results == 1) &&
                             (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
                        {
                            hme_map_mvs_to_grid(
                                &aps_mv_grid[0],
                                s_merge_prms_32x32_tl.ps_results_merge,
                                s_merge_prms_32x32_tl.au1_pred_dir_searched,
                                s_merge_prms_32x32_tl.i4_num_pred_dir_actual);
                        }

                        if(ME_PRISTINE_QUALITY != e_me_quality_presets)
                        {
                            ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
                                .ps_child_node_tl->is_node_valid = 1;
                            NULLIFY_THE_CHILDREN_NODES(
                                ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
                                    .ps_child_node_tl);
                        }

                        merge_count_32x32++;
                        e_merge_result = CU_SPLIT;
                    }
                    else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
                    {
#if ENABLE_CU_TREE_CULLING
                        cur_ctb_cu_tree_t *ps_tree =
                            ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;

                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
                        ENABLE_THE_CHILDREN_NODES(ps_tree);
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
#endif
                    }
                }
                else if((en_merge_32x32 & 1) && (!(en_merge_execution & 1)))
                {
#if ENABLE_CU_TREE_CULLING
                    cur_ctb_cu_tree_t *ps_tree =
                        ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;

                    ENABLE_THE_CHILDREN_NODES(ps_tree);
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
#endif

                    if(au1_is_32x32Blk_noisy[0] && DISABLE_INTRA_WHEN_NOISY)
                    {
                        ps_tree->is_node_valid = 0;
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
                    }
                }

                if((en_merge_32x32 & 2) && (en_merge_execution & 2))
                {
                    range_prms_t *ps_pic_limit;
                    if(s_merge_prms_32x32_tr.i4_use_rec == 1)
                    {
                        ps_pic_limit = &s_pic_limit_rec;
                    }
                    else
                    {
                        ps_pic_limit = &s_pic_limit_inp;
                    }
                    /* MV limit is different based on ref. PIC */
                    for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
                    {
                        hme_derive_search_range(
                            s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
                            ps_pic_limit,
                            &as_mv_limit[ref_ctr],
                            (i4_ctb_x << 6) + 32,
                            i4_ctb_y << 6,
                            32,
                            32);
                        SCALE_RANGE_PRMS_POINTERS(
                            s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
                            s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
                            2);
                    }
                    s_merge_prms_32x32_tr.i4_ctb_x_off = i4_ctb_x << 6;
                    s_merge_prms_32x32_tr.i4_ctb_y_off = i4_ctb_y << 6;
                    s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[1];

                    e_merge_result = hme_try_merge_high_speed(
                        ps_thrd_ctxt,
                        ps_ctxt,
                        ps_cur_ipe_ctb,
                        &s_subpel_prms,
                        &s_merge_prms_32x32_tr,
                        ps_pu_results,
                        &as_pu_results[0][0][0]);

                    if(e_merge_result == CU_MERGED)
                    {
                        inter_cu_results_t *ps_cu_results =
                            s_merge_prms_32x32_tr.ps_results_merge->ps_cu_results;

                        if(!((ps_cu_results->u1_num_best_results == 1) &&
                             (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
                        {
                            hme_map_mvs_to_grid(
                                &aps_mv_grid[0],
                                s_merge_prms_32x32_tr.ps_results_merge,
                                s_merge_prms_32x32_tr.au1_pred_dir_searched,
                                s_merge_prms_32x32_tr.i4_num_pred_dir_actual);
                        }

                        if(ME_PRISTINE_QUALITY != e_me_quality_presets)
                        {
                            ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
                                .ps_child_node_tr->is_node_valid = 1;
                            NULLIFY_THE_CHILDREN_NODES(
                                ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
                                    .ps_child_node_tr);
                        }

                        merge_count_32x32++;
                        e_merge_result = CU_SPLIT;
                    }
                    else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
                    {
#if ENABLE_CU_TREE_CULLING
                        cur_ctb_cu_tree_t *ps_tree =
                            ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;

                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
                        ENABLE_THE_CHILDREN_NODES(ps_tree);
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
#endif
                    }
                }
                else if((en_merge_32x32 & 2) && (!(en_merge_execution & 2)))
                {
#if ENABLE_CU_TREE_CULLING
                    cur_ctb_cu_tree_t *ps_tree =
                        ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;

                    ENABLE_THE_CHILDREN_NODES(ps_tree);
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
#endif

                    if(au1_is_32x32Blk_noisy[1] && DISABLE_INTRA_WHEN_NOISY)
                    {
                        ps_tree->is_node_valid = 0;
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
                    }
                }

                if((en_merge_32x32 & 4) && (en_merge_execution & 4))
                {
                    range_prms_t *ps_pic_limit;
                    if(s_merge_prms_32x32_bl.i4_use_rec == 1)
                    {
                        ps_pic_limit = &s_pic_limit_rec;
                    }
                    else
                    {
                        ps_pic_limit = &s_pic_limit_inp;
                    }
                    /* MV limit is different based on ref. PIC */
                    for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
                    {
                        hme_derive_search_range(
                            s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
                            ps_pic_limit,
                            &as_mv_limit[ref_ctr],
                            i4_ctb_x << 6,
                            (i4_ctb_y << 6) + 32,
                            32,
                            32);
                        SCALE_RANGE_PRMS_POINTERS(
                            s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
                            s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
                            2);
                    }
                    s_merge_prms_32x32_bl.i4_ctb_x_off = i4_ctb_x << 6;
                    s_merge_prms_32x32_bl.i4_ctb_y_off = i4_ctb_y << 6;
                    s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[2];

                    e_merge_result = hme_try_merge_high_speed(
                        ps_thrd_ctxt,
                        ps_ctxt,
                        ps_cur_ipe_ctb,
                        &s_subpel_prms,
                        &s_merge_prms_32x32_bl,
                        ps_pu_results,
                        &as_pu_results[0][0][0]);

                    if(e_merge_result == CU_MERGED)
                    {
                        inter_cu_results_t *ps_cu_results =
                            s_merge_prms_32x32_bl.ps_results_merge->ps_cu_results;

                        if(!((ps_cu_results->u1_num_best_results == 1) &&
                             (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
                        {
                            hme_map_mvs_to_grid(
                                &aps_mv_grid[0],
                                s_merge_prms_32x32_bl.ps_results_merge,
                                s_merge_prms_32x32_bl.au1_pred_dir_searched,
                                s_merge_prms_32x32_bl.i4_num_pred_dir_actual);
                        }

                        if(ME_PRISTINE_QUALITY != e_me_quality_presets)
                        {
                            ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
                                .ps_child_node_bl->is_node_valid = 1;
                            NULLIFY_THE_CHILDREN_NODES(
                                ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
                                    .ps_child_node_bl);
                        }

                        merge_count_32x32++;
                        e_merge_result = CU_SPLIT;
                    }
                    else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
                    {
#if ENABLE_CU_TREE_CULLING
                        cur_ctb_cu_tree_t *ps_tree =
                            ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;

                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
                        ENABLE_THE_CHILDREN_NODES(ps_tree);
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
#endif
                    }
                }
                else if((en_merge_32x32 & 4) && (!(en_merge_execution & 4)))
                {
#if ENABLE_CU_TREE_CULLING
                    cur_ctb_cu_tree_t *ps_tree =
                        ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;

                    ENABLE_THE_CHILDREN_NODES(ps_tree);
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
#endif

                    if(au1_is_32x32Blk_noisy[2] && DISABLE_INTRA_WHEN_NOISY)
                    {
                        ps_tree->is_node_valid = 0;
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
                    }
                }

                if((en_merge_32x32 & 8) && (en_merge_execution & 8))
                {
                    range_prms_t *ps_pic_limit;
                    if(s_merge_prms_32x32_br.i4_use_rec == 1)
                    {
                        ps_pic_limit = &s_pic_limit_rec;
                    }
                    else
                    {
                        ps_pic_limit = &s_pic_limit_inp;
                    }
                    /* MV limit is different based on ref. PIC */
                    for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
                    {
                        hme_derive_search_range(
                            s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
                            ps_pic_limit,
                            &as_mv_limit[ref_ctr],
                            (i4_ctb_x << 6) + 32,
                            (i4_ctb_y << 6) + 32,
                            32,
                            32);

                        SCALE_RANGE_PRMS_POINTERS(
                            s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
                            s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
                            2);
                    }
                    s_merge_prms_32x32_br.i4_ctb_x_off = i4_ctb_x << 6;
                    s_merge_prms_32x32_br.i4_ctb_y_off = i4_ctb_y << 6;
                    s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[3];

                    e_merge_result = hme_try_merge_high_speed(
                        ps_thrd_ctxt,
                        ps_ctxt,
                        ps_cur_ipe_ctb,
                        &s_subpel_prms,
                        &s_merge_prms_32x32_br,
                        ps_pu_results,
                        &as_pu_results[0][0][0]);

                    if(e_merge_result == CU_MERGED)
                    {
                        /*inter_cu_results_t *ps_cu_results = s_merge_prms_32x32_br.ps_results_merge->ps_cu_results;

                        if(!((ps_cu_results->u1_num_best_results == 1) &&
                        (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
                        {
                        hme_map_mvs_to_grid
                        (
                        &aps_mv_grid[0],
                        s_merge_prms_32x32_br.ps_results_merge,
                        s_merge_prms_32x32_br.au1_pred_dir_searched,
                        s_merge_prms_32x32_br.i4_num_pred_dir_actual
                        );
                        }*/

                        if(ME_PRISTINE_QUALITY != e_me_quality_presets)
                        {
                            ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
                                .ps_child_node_br->is_node_valid = 1;
                            NULLIFY_THE_CHILDREN_NODES(
                                ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
                                    .ps_child_node_br);
                        }

                        merge_count_32x32++;
                        e_merge_result = CU_SPLIT;
                    }
                    else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
                    {
#if ENABLE_CU_TREE_CULLING
                        cur_ctb_cu_tree_t *ps_tree =
                            ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;

                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
                        ENABLE_THE_CHILDREN_NODES(ps_tree);
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
#endif
                    }
                }
                else if((en_merge_32x32 & 8) && (!(en_merge_execution & 8)))
                {
#if ENABLE_CU_TREE_CULLING
                    cur_ctb_cu_tree_t *ps_tree =
                        ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;

                    ENABLE_THE_CHILDREN_NODES(ps_tree);
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
#endif

                    if(au1_is_32x32Blk_noisy[3] && DISABLE_INTRA_WHEN_NOISY)
                    {
                        ps_tree->is_node_valid = 0;
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
                    }
                }

                /* Try merging all 32x32 to 64x64 candts */
                if(((en_merge_32x32 & 0xf) == 0xf) &&
                   (((merge_count_32x32 == 4) && (e_me_quality_presets != ME_PRISTINE_QUALITY)) ||
                    ((en_merge_execution & 16) && (e_me_quality_presets == ME_PRISTINE_QUALITY))))
                    if((((e_me_quality_presets == ME_XTREME_SPEED_25) &&
                         !DISABLE_64X64_BLOCK_MERGE_IN_ME_IN_XS25) ||
                        (e_me_quality_presets != ME_XTREME_SPEED_25)))
                    {
                        range_prms_t *ps_pic_limit;
                        if(s_merge_prms_64x64.i4_use_rec == 1)
                        {
                            ps_pic_limit = &s_pic_limit_rec;
                        }
                        else
                        {
                            ps_pic_limit = &s_pic_limit_inp;
                        }
                        /* MV limit is different based on ref. PIC */
                        for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
                        {
                            hme_derive_search_range(
                                s_merge_prms_64x64.aps_mv_range[ref_ctr],
                                ps_pic_limit,
                                &as_mv_limit[ref_ctr],
                                i4_ctb_x << 6,
                                i4_ctb_y << 6,
                                64,
                                64);

                            SCALE_RANGE_PRMS_POINTERS(
                                s_merge_prms_64x64.aps_mv_range[ref_ctr],
                                s_merge_prms_64x64.aps_mv_range[ref_ctr],
                                2);
                        }
                        s_merge_prms_64x64.i4_ctb_x_off = i4_ctb_x << 6;
                        s_merge_prms_64x64.i4_ctb_y_off = i4_ctb_y << 6;
                        s_subpel_prms.u1_is_cu_noisy = au1_is_64x64Blk_noisy[0];

                        e_merge_result = hme_try_merge_high_speed(
                            ps_thrd_ctxt,
                            ps_ctxt,
                            ps_cur_ipe_ctb,
                            &s_subpel_prms,
                            &s_merge_prms_64x64,
                            ps_pu_results,
                            &as_pu_results[0][0][0]);

                        if((e_merge_result == CU_MERGED) &&
                           (ME_PRISTINE_QUALITY != e_me_quality_presets))
                        {
                            ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
                                .is_node_valid = 1;
                            NULLIFY_THE_CHILDREN_NODES(
                                ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE));
                        }
                        else if(
                            (e_merge_result == CU_SPLIT) &&
                            (ME_PRISTINE_QUALITY == e_me_quality_presets))
                        {
                            ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
                                .is_node_valid = 0;
                        }
                    }

                /*****************************************************************/
                /* UPDATION OF RESULT TO EXTERNAL STRUCTURES                     */
                /*****************************************************************/
                pf_ext_update_fxn((void *)ps_thrd_ctxt, (void *)ps_ctxt, i4_ctb_x, i4_ctb_y);

                {
#ifdef _DEBUG
                    S32 wd = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
                                 ? 64
                                 : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
                    S32 ht = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
                                 ? 64
                                 : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
                    ASSERT(
                        (wd * ht) ==
                        ihevce_compute_area_of_valid_cus_in_ctb(
                            &ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]));
#endif
                }
            }

            /* set the dependency for the corresponding row in enc loop */
            ihevce_dmgr_set_row_row_sync(
                pv_dep_mngr_encloop_dep_me,
                (i4_ctb_x + 1),
                i4_ctb_y,
                tile_col_idx /* Col Tile No. */);

            left_ctb_in_diff_tile = 0;
        }
    }
}

/**
********************************************************************************
*  @fn   void hme_refine_no_encode(coarse_me_ctxt_t *ps_ctxt,
*                       refine_layer_prms_t *ps_refine_prms)
*
*  @brief  Top level entry point for refinement ME
*
*  @param[in,out]  ps_ctxt: ME Handle
*
*  @param[in]  ps_refine_prms : refinement layer prms
*
*  @return None
********************************************************************************
*/
void hme_refine_no_encode(
    coarse_me_ctxt_t *ps_ctxt,
    refine_prms_t *ps_refine_prms,
    multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
    S32 lyr_job_type,
    WORD32 i4_ping_pong,
    void **ppv_dep_mngr_hme_sync)
{
    BLK_SIZE_T e_search_blk_size, e_result_blk_size;
    ME_QUALITY_PRESETS_T e_me_quality_presets =
        ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;

    /*************************************************************************/
    /* Complexity of search: Low to High                                     */
    /*************************************************************************/
    SEARCH_COMPLEXITY_T e_search_complexity;

    /*************************************************************************/
    /* Config parameter structures for varius ME submodules                  */
    /*************************************************************************/
    hme_search_prms_t s_search_prms_blk;
    mvbank_update_prms_t s_mv_update_prms;

    /*************************************************************************/
    /* All types of search candidates for predictor based search.            */
    /*************************************************************************/
    S32 num_init_candts = 0;
    search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
    search_node_t as_top_neighbours[4], as_left_neighbours[3];
    search_node_t *ps_candt_zeromv, *ps_candt_tl, *ps_candt_tr;
    search_node_t *ps_candt_l, *ps_candt_t;
    search_node_t *ps_candt_prj_br[2], *ps_candt_prj_b[2], *ps_candt_prj_r[2];
    search_node_t *ps_candt_prj_bl[2];
    search_node_t *ps_candt_prj_tr[2], *ps_candt_prj_t[2], *ps_candt_prj_tl[2];
    search_node_t *ps_candt_prj_coloc[2];

    pf_get_wt_inp fp_get_wt_inp;

    search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
    U32 au4_unique_node_map[MAP_X_MAX * 2];

    /*EIID */
    WORD32 i4_num_inter_wins = 0;  //debug code to find stat of
    WORD32 i4_num_comparisions = 0;  //debug code
    WORD32 i4_threshold_multiplier;
    WORD32 i4_threshold_divider;
    WORD32 i4_temporal_layer =
        ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_temporal_lyr_id;

    /*************************************************************************/
    /* points ot the search results for the blk level search (8x8/16x16)     */
    /*************************************************************************/
    search_results_t *ps_search_results;

    /*************************************************************************/
    /* Coordinates                                                           */
    /*************************************************************************/
    S32 blk_x, i4_ctb_x, blk_id_in_ctb;
    //S32 i4_ctb_y;
    S32 pos_x, pos_y;
    S32 blk_id_in_full_ctb;
    S32 i4_num_srch_cands;

    S32 blk_y;

    /*************************************************************************/
    /* Related to dimensions of block being searched and pic dimensions      */
    /*************************************************************************/
    S32 blk_wd, blk_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic;
    S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
    S32 num_results_prev_layer;

    /*************************************************************************/
    /* Size of a basic unit for this layer. For non encode layers, we search */
    /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
    /* basic unit size is the ctb size.                                      */
    /*************************************************************************/
    S32 unit_size;

    /*************************************************************************/
    /* Pointers to context in current and coarser layers                     */
    /*************************************************************************/
    layer_ctxt_t *ps_curr_layer, *ps_coarse_layer;

    /*************************************************************************/
    /* to store mv range per blk, and picture limit, allowed search range    */
    /* range prms in hpel and qpel units as well                             */
    /*************************************************************************/
    range_prms_t s_range_prms_inp, s_range_prms_rec;
    range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
    /*************************************************************************/
    /* These variables are used to track number of references at different   */
    /* stages of ME.                                                         */
    /*************************************************************************/
    S32 i4_num_ref_fpel, i4_num_ref_before_merge;
    S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
    S32 lambda_inp = ps_refine_prms->lambda_inp;

    /*************************************************************************/
    /* When a layer is implicit, it means that it searches on 1 or 2 ref idx */
    /* Explicit means it searches on all active ref idx.                     */
    /*************************************************************************/
    S32 curr_layer_implicit, prev_layer_implicit;

    /*************************************************************************/
    /* Variables for loop counts                                             */
    /*************************************************************************/
    S32 id;
    S08 i1_ref_idx;

    /*************************************************************************/
    /* Input pointer and stride                                              */
    /*************************************************************************/
    U08 *pu1_inp;
    S32 i4_inp_stride;

    S32 end_of_frame;

    S32 num_sync_units_in_row;

    PF_HME_PROJECT_COLOC_CANDT_FXN pf_hme_project_coloc_candt;
    ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);

    /*************************************************************************/
    /* Pointers to current and coarse layer are needed for projection */
    /* Pointer to prev layer are needed for other candts like coloc   */
    /*************************************************************************/
    ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];

    ps_coarse_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id + 1];

    num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;

    /* Function pointer is selected based on the C vc X86 macro */

    fp_get_wt_inp = ((ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list)
                        ->pf_get_wt_inp_8x8;

    i4_inp_stride = ps_curr_layer->i4_inp_stride;
    i4_pic_wd = ps_curr_layer->i4_wd;
    i4_pic_ht = ps_curr_layer->i4_ht;
    e_search_complexity = ps_refine_prms->e_search_complexity;

    end_of_frame = 0;

    /* If the previous layer is non-encode layer, then use dyadic projection */
    if(0 == ps_ctxt->u1_encode[ps_refine_prms->i4_layer_id + 1])
        pf_hme_project_coloc_candt = hme_project_coloc_candt_dyadic;
    else
        pf_hme_project_coloc_candt = hme_project_coloc_candt;

    /* This points to all the initial candts */
    ps_search_candts = &as_search_candts[0];

    {
        e_search_blk_size = BLK_8x8;
        blk_wd = blk_ht = 8;
        blk_size_shift = 3;
        s_mv_update_prms.i4_shift = 0;
        /*********************************************************************/
        /* In case we do not encode this layer, we search 8x8 with or without*/
        /* enable 4x4 SAD.                                                   */
        /*********************************************************************/
        {
            S32 i4_mask = (ENABLE_2Nx2N);

            e_result_blk_size = BLK_8x8;
            if(ps_refine_prms->i4_enable_4x4_part)
            {
                i4_mask |= (ENABLE_NxN);
                e_result_blk_size = BLK_4x4;
                s_mv_update_prms.i4_shift = 1;
            }

            s_search_prms_blk.i4_part_mask = i4_mask;
        }

        unit_size = blk_wd;
        s_search_prms_blk.i4_inp_stride = unit_size;
    }

    /* This is required to properly update the layer mv bank */
    s_mv_update_prms.e_search_blk_size = e_search_blk_size;
    s_search_prms_blk.e_blk_size = e_search_blk_size;

    /*************************************************************************/
    /* If current layer is explicit, then the number of ref frames are to    */
    /* be same as previous layer. Else it will be 2                          */
    /*************************************************************************/
    i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
    if(ps_refine_prms->explicit_ref)
    {
        curr_layer_implicit = 0;
        i4_num_ref_fpel = i4_num_ref_prev_layer;
        /* 100578 : Using same mv cost fun. for all presets. */
        s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_refine;
    }
    else
    {
        i4_num_ref_fpel = 2;
        curr_layer_implicit = 1;
        {
            if(ME_MEDIUM_SPEED > e_me_quality_presets)
            {
                s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit;
            }
            else
            {
#if USE_MODIFIED == 1
                s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
#else
                s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
#endif
            }
        }
    }

    i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
    if(ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type ==
           IV_IDR_FRAME ||
       ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type == IV_I_FRAME)
    {
        i4_num_ref_fpel = 1;
    }
    if(i4_num_ref_prev_layer <= 2)
    {
        prev_layer_implicit = 1;
        curr_layer_implicit = 1;
        i4_num_ref_each_dir = 1;
    }
    else
    {
        /* It is assumed that we have equal number of references in each dir */
        //ASSERT(!(i4_num_ref_prev_layer & 1));
        prev_layer_implicit = 0;
        i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
    }
    s_mv_update_prms.i4_num_ref = i4_num_ref_fpel;
    s_mv_update_prms.i4_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
    s_mv_update_prms.i4_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;

    /* this can be kept to 1 or 2 */
    i4_num_ref_before_merge = 2;
    i4_num_ref_before_merge = MIN(i4_num_ref_before_merge, i4_num_ref_fpel);

    /* Set up place holders to hold the search nodes of each initial candt */
    for(i = 0; i < MAX_INIT_CANDTS; i++)
    {
        ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
        INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
    }

    /* redundant, but doing it here since it is used in pred ctxt init */
    ps_candt_zeromv = ps_search_candts[0].ps_search_node;
    for(i = 0; i < 3; i++)
    {
        search_node_t *ps_search_node;
        ps_search_node = &as_left_neighbours[i];
        INIT_SEARCH_NODE(ps_search_node, 0);
        ps_search_node = &as_top_neighbours[i];
        INIT_SEARCH_NODE(ps_search_node, 0);
    }

    INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
    /* bottom left node always not available for the blk being searched */
    as_left_neighbours[2].u1_is_avail = 0;
    /*************************************************************************/
    /* Initialize all the search results structure here. We update all the   */
    /* search results to default values, and configure things like blk sizes */
    /*************************************************************************/
    if(ps_refine_prms->i4_encode == 0)
    {
        S32 pred_lx;
        search_results_t *ps_search_results;

        ps_search_results = &ps_ctxt->s_search_results_8x8;
        hme_init_search_results(
            ps_search_results,
            i4_num_ref_fpel,
            ps_refine_prms->i4_num_fpel_results,
            ps_refine_prms->i4_num_results_per_part,
            e_search_blk_size,
            0,
            0,
            &ps_ctxt->au1_is_past[0]);
        for(pred_lx = 0; pred_lx < 2; pred_lx++)
        {
            hme_init_pred_ctxt_no_encode(
                &ps_search_results->as_pred_ctxt[pred_lx],
                ps_search_results,
                &as_top_neighbours[0],
                &as_left_neighbours[0],
                &ps_candt_prj_coloc[0],
                ps_candt_zeromv,
                ps_candt_zeromv,
                pred_lx,
                lambda_inp,
                ps_refine_prms->lambda_q_shift,
                &ps_ctxt->apu1_ref_bits_tlu_lc[0],
                &ps_ctxt->ai2_ref_scf[0]);
        }
    }

    /*********************************************************************/
    /* Initialize the dyn. search range params. for each reference index */
    /* in current layer ctxt                                             */
    /*********************************************************************/
    /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
    if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
    {
        WORD32 ref_ctr;

        for(ref_ctr = 0; ref_ctr < s_mv_update_prms.i4_num_ref; ref_ctr++)
        {
            INIT_DYN_SEARCH_PRMS(
                &ps_ctxt->s_coarse_dyn_range_prms
                     .as_dyn_range_prms[ps_refine_prms->i4_layer_id][ref_ctr],
                ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
        }
    }

    /* Next set up initial candidates according to a given set of rules.   */
    /* The number of initial candidates affects the quality of ME in the   */
    /* case of motion with multiple degrees of freedom. In case of simple  */
    /* translational motion, a current and a few causal and non causal     */
    /* candts would suffice. More candidates help to cover more complex    */
    /* cases like partitions, rotation/zoom, occlusion in/out, fine motion */
    /* where multiple ref helps etc.                                       */
    /* The candidate choice also depends on the following parameters.      */
    /* e_search_complexity: SRCH_CX_LOW, SRCH_CX_MED, SRCH_CX_HIGH         */
    /* Whether we encode or not, and the type of search across reference   */
    /* i.e. the previous layer may have been explicit/implicit and curr    */
    /* layer may be explicit/implicit                                      */

    /* 0, 0, L, T, projected coloc best always presnt by default */
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(ZERO_MV, e_me_quality_presets);
    ps_candt_zeromv = ps_search_candts[id].ps_search_node;
    ps_search_candts[id].u1_num_steps_refine = 0;
    ps_candt_zeromv->s_mv.i2_mvx = 0;
    ps_candt_zeromv->s_mv.i2_mvy = 0;

    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(SPATIAL_LEFT0, e_me_quality_presets);
    ps_candt_l = ps_search_candts[id].ps_search_node;
    ps_search_candts[id].u1_num_steps_refine = 0;

    /* Even in ME_HIGH_SPEED mode, in layer 0, blocks */
    /* not at the CTB boundary use the causal T and */
    /* not the projected T, although the candidate is */
    /* still pointed to by ps_candt_prj_t[0] */
    if(ME_MEDIUM_SPEED <= e_me_quality_presets)
    {
        /* Using Projected top to eliminate sync */
        id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
            PROJECTED_TOP0, e_me_quality_presets);
        ps_candt_prj_t[0] = ps_search_candts[id].ps_search_node;
        ps_search_candts[id].u1_num_steps_refine = 1;
    }
    else
    {
        id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
            SPATIAL_TOP0, e_me_quality_presets);
        ps_candt_t = ps_search_candts[id].ps_search_node;
        ps_search_candts[id].u1_num_steps_refine = 0;
    }

    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
        PROJECTED_COLOC0, e_me_quality_presets);
    ps_candt_prj_coloc[0] = ps_search_candts[id].ps_search_node;
    ps_search_candts[id].u1_num_steps_refine = 1;

    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
        PROJECTED_COLOC1, e_me_quality_presets);
    ps_candt_prj_coloc[1] = ps_search_candts[id].ps_search_node;
    ps_search_candts[id].u1_num_steps_refine = 1;

    if(ME_MEDIUM_SPEED <= e_me_quality_presets)
    {
        id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
            PROJECTED_TOP_RIGHT0, e_me_quality_presets);
        ps_candt_prj_tr[0] = ps_search_candts[id].ps_search_node;
        ps_search_candts[id].u1_num_steps_refine = 1;

        id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
            PROJECTED_TOP_LEFT0, e_me_quality_presets);
        ps_candt_prj_tl[0] = ps_search_candts[id].ps_search_node;
        ps_search_candts[id].u1_num_steps_refine = 1;
    }
    else
    {
        id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
            SPATIAL_TOP_RIGHT0, e_me_quality_presets);
        ps_candt_tr = ps_search_candts[id].ps_search_node;
        ps_search_candts[id].u1_num_steps_refine = 0;

        id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
            SPATIAL_TOP_LEFT0, e_me_quality_presets);
        ps_candt_tl = ps_search_candts[id].ps_search_node;
        ps_search_candts[id].u1_num_steps_refine = 0;
    }

    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
        PROJECTED_RIGHT0, e_me_quality_presets);
    ps_candt_prj_r[0] = ps_search_candts[id].ps_search_node;
    ps_search_candts[id].u1_num_steps_refine = 1;

    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
        PROJECTED_BOTTOM0, e_me_quality_presets);
    ps_candt_prj_b[0] = ps_search_candts[id].ps_search_node;
    ps_search_candts[id].u1_num_steps_refine = 1;

    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
        PROJECTED_BOTTOM_RIGHT0, e_me_quality_presets);
    ps_candt_prj_br[0] = ps_search_candts[id].ps_search_node;
    ps_search_candts[id].u1_num_steps_refine = 1;

    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
        PROJECTED_BOTTOM_LEFT0, e_me_quality_presets);
    ps_candt_prj_bl[0] = ps_search_candts[id].ps_search_node;
    ps_search_candts[id].u1_num_steps_refine = 1;

    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
        PROJECTED_RIGHT1, e_me_quality_presets);
    ps_candt_prj_r[1] = ps_search_candts[id].ps_search_node;
    ps_search_candts[id].u1_num_steps_refine = 1;

    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
        PROJECTED_BOTTOM1, e_me_quality_presets);
    ps_candt_prj_b[1] = ps_search_candts[id].ps_search_node;
    ps_search_candts[id].u1_num_steps_refine = 1;

    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
        PROJECTED_BOTTOM_RIGHT1, e_me_quality_presets);
    ps_candt_prj_br[1] = ps_search_candts[id].ps_search_node;
    ps_search_candts[id].u1_num_steps_refine = 1;

    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
        PROJECTED_BOTTOM_LEFT1, e_me_quality_presets);
    ps_candt_prj_bl[1] = ps_search_candts[id].ps_search_node;
    ps_search_candts[id].u1_num_steps_refine = 1;

    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(PROJECTED_TOP1, e_me_quality_presets);
    ps_candt_prj_t[1] = ps_search_candts[id].ps_search_node;
    ps_search_candts[id].u1_num_steps_refine = 1;

    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
        PROJECTED_TOP_RIGHT1, e_me_quality_presets);
    ps_candt_prj_tr[1] = ps_search_candts[id].ps_search_node;
    ps_search_candts[id].u1_num_steps_refine = 1;

    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
        PROJECTED_TOP_LEFT1, e_me_quality_presets);
    ps_candt_prj_tl[1] = ps_search_candts[id].ps_search_node;
    ps_search_candts[id].u1_num_steps_refine = 1;

    /*************************************************************************/
    /* Now that the candidates have been ordered, to choose the right number */
    /* of initial candidates.                                                */
    /*************************************************************************/
    if(curr_layer_implicit && !prev_layer_implicit)
    {
        if(e_search_complexity == SEARCH_CX_LOW)
            num_init_candts = 7;
        else if(e_search_complexity == SEARCH_CX_MED)
            num_init_candts = 13;
        else if(e_search_complexity == SEARCH_CX_HIGH)
            num_init_candts = 18;
        else
            ASSERT(0);
    }
    else
    {
        if(e_search_complexity == SEARCH_CX_LOW)
            num_init_candts = 5;
        else if(e_search_complexity == SEARCH_CX_MED)
            num_init_candts = 11;
        else if(e_search_complexity == SEARCH_CX_HIGH)
            num_init_candts = 16;
        else
            ASSERT(0);
    }

    if(ME_XTREME_SPEED_25 == e_me_quality_presets)
    {
        num_init_candts = NUM_INIT_SEARCH_CANDS_IN_L1_AND_L2_ME_IN_XS25;
    }

    /*************************************************************************/
    /* The following search parameters are fixed throughout the search across*/
    /* all blks. So these are configured outside processing loop             */
    /*************************************************************************/
    s_search_prms_blk.i4_num_init_candts = num_init_candts;
    s_search_prms_blk.i4_start_step = 1;
    s_search_prms_blk.i4_use_satd = 0;
    s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
    /* we use recon only for encoded layers, otherwise it is not available */
    s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;

    s_search_prms_blk.ps_search_candts = ps_search_candts;
    /* We use the same mv_range for all ref. pic. So assign to member 0 */
    if(s_search_prms_blk.i4_use_rec)
        s_search_prms_blk.aps_mv_range[0] = &s_range_prms_rec;
    else
        s_search_prms_blk.aps_mv_range[0] = &s_range_prms_inp;
    /*************************************************************************/
    /* Initialize coordinates. Meaning as follows                            */
    /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks     */
    /* blk_y : same as above, y coord.                                       */
    /* num_blks_in_this_ctb : number of blks in this given ctb that starts   */
    /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries.      */
    /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left  */
    /* corner of the picture. Always multiple of 64.                         */
    /* blk_id_in_ctb : encode order id of the blk in the ctb.                */
    /*************************************************************************/
    blk_y = 0;
    blk_id_in_ctb = 0;

    GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic);

    /* Get the number of sync units in a row based on encode/non enocde layer */
    num_sync_units_in_row = num_blks_in_row;

    /*************************************************************************/
    /* Picture limit on all 4 sides. This will be used to set mv limits for  */
    /* every block given its coordinate. Note thsi assumes that the min amt  */
    /* of padding to right of pic is equal to the blk size. If we go all the */
    /* way upto 64x64, then the min padding on right size of picture should  */
    /* be 64, and also on bottom side of picture.                            */
    /*************************************************************************/
    SET_PIC_LIMIT(
        s_pic_limit_inp,
        ps_curr_layer->i4_pad_x_inp,
        ps_curr_layer->i4_pad_y_inp,
        ps_curr_layer->i4_wd,
        ps_curr_layer->i4_ht,
        s_search_prms_blk.i4_num_steps_post_refine);

    SET_PIC_LIMIT(
        s_pic_limit_rec,
        ps_curr_layer->i4_pad_x_rec,
        ps_curr_layer->i4_pad_y_rec,
        ps_curr_layer->i4_wd,
        ps_curr_layer->i4_ht,
        s_search_prms_blk.i4_num_steps_post_refine);

    /*************************************************************************/
    /* set the MV limit per ref. pic.                                        */
    /*    - P pic. : Based on the config params.                             */
    /*    - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
    /*************************************************************************/
    {
        WORD32 ref_ctr;
        /* Only for B/b pic. */
        if(1 == ps_ctxt->s_frm_prms.bidir_enabled)
        {
            WORD16 i2_mv_y_per_poc, i2_max_mv_y;
            WORD32 cur_poc, ref_poc, abs_poc_diff;

            cur_poc = ps_ctxt->i4_curr_poc;

            /* Get abs MAX for symmetric search */
            i2_mv_y_per_poc = MAX(
                ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[ps_refine_prms->i4_layer_id],
                (ABS(ps_ctxt->s_coarse_dyn_range_prms
                         .i2_dyn_min_y_per_poc[ps_refine_prms->i4_layer_id])));

            for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
            {
                ref_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr];
                abs_poc_diff = ABS((cur_poc - ref_poc));
                /* Get the cur. max MV based on POC distance */
                i2_max_mv_y = i2_mv_y_per_poc * abs_poc_diff;
                i2_max_mv_y = MIN(i2_max_mv_y, ps_curr_layer->i2_max_mv_y);

                as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
                as_mv_limit[ref_ctr].i2_min_y = -i2_max_mv_y;
                as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
                as_mv_limit[ref_ctr].i2_max_y = i2_max_mv_y;
            }
        }
        else
        {
            /* Set the Config. File Params for P pic. */
            for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
            {
                as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
                as_mv_limit[ref_ctr].i2_min_y = -ps_curr_layer->i2_max_mv_y;
                as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
                as_mv_limit[ref_ctr].i2_max_y = ps_curr_layer->i2_max_mv_y;
            }
        }
    }

    /* EIID: Calculate threshold based on quality preset and/or temporal layers */
    if(e_me_quality_presets == ME_MEDIUM_SPEED)
    {
        i4_threshold_multiplier = 1;
        i4_threshold_divider = 4;
    }
    else if(e_me_quality_presets == ME_HIGH_SPEED)
    {
        i4_threshold_multiplier = 1;
        i4_threshold_divider = 2;
    }
    else if((e_me_quality_presets == ME_XTREME_SPEED) || (e_me_quality_presets == ME_XTREME_SPEED_25))
    {
#if OLD_XTREME_SPEED
        /* Hard coding the temporal ID value to 1, if it is older xtreme speed */
        i4_temporal_layer = 1;
#endif
        if(i4_temporal_layer == 0)
        {
            i4_threshold_multiplier = 3;
            i4_threshold_divider = 4;
        }
        else if(i4_temporal_layer == 1)
        {
            i4_threshold_multiplier = 3;
            i4_threshold_divider = 4;
        }
        else if(i4_temporal_layer == 2)
        {
            i4_threshold_multiplier = 1;
            i4_threshold_divider = 1;
        }
        else
        {
            i4_threshold_multiplier = 5;
            i4_threshold_divider = 4;
        }
    }
    else if(e_me_quality_presets == ME_HIGH_QUALITY)
    {
        i4_threshold_multiplier = 1;
        i4_threshold_divider = 1;
    }

    /*************************************************************************/
    /*************************************************************************/
    /*************************************************************************/
    /* START OF THE CORE LOOP                                                */
    /* If Encode is 0, then we just loop over each blk                       */
    /*************************************************************************/
    /*************************************************************************/
    /*************************************************************************/
    while(0 == end_of_frame)
    {
        job_queue_t *ps_job;
        ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_row;  //EIID
        WORD32 i4_ctb_row_ctr;  //counter to calculate CTB row counter. It's (row_ctr /4)
        WORD32 i4_num_ctbs_in_row = (num_blks_in_row + 3) / 4;  //calculations verified for L1 only
        //+3 to get ceil values when divided by 4
        WORD32 i4_num_4x4_blocks_in_ctb_at_l1 =
            8 * 8;  //considering CTB size 32x32 at L1. hardcoded for now
        //if there is variable for ctb size use that and this variable can be derived
        WORD32 offset_val, check_dep_pos, set_dep_pos;
        void *pv_hme_dep_mngr;
        ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row;

        /* Get the current layer HME Dep Mngr       */
        /* Note : Use layer_id - 1 in HME layers    */

        pv_hme_dep_mngr = ppv_dep_mngr_hme_sync[ps_refine_prms->i4_layer_id - 1];

        /* Get the current row from the job queue */
        ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job(
            ps_multi_thrd_ctxt, lyr_job_type, 1, i4_ping_pong);

        /* If all rows are done, set the end of process flag to 1, */
        /* and the current row to -1 */
        if(NULL == ps_job)
        {
            blk_y = -1;
            end_of_frame = 1;

            continue;
        }

        if(1 == ps_ctxt->s_frm_prms.is_i_pic)
        {
            /* set the output dependency of current row */
            ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
            continue;
        }

        blk_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
        blk_x = 0;
        i4_ctb_x = 0;

        /* wait for Corresponding Pre intra Job to be completed */
        if(1 == ps_refine_prms->i4_layer_id)
        {
            volatile UWORD32 i4_l1_done;
            volatile UWORD32 *pi4_l1_done;
            pi4_l1_done = (volatile UWORD32 *)&ps_multi_thrd_ctxt
                              ->aai4_l1_pre_intra_done[i4_ping_pong][blk_y >> 2];
            i4_l1_done = *pi4_l1_done;
            while(!i4_l1_done)
            {
                i4_l1_done = *pi4_l1_done;
            }
        }
        /* Set Variables for Dep. Checking and Setting */
        set_dep_pos = blk_y + 1;
        if(blk_y > 0)
        {
            offset_val = 2;
            check_dep_pos = blk_y - 1;
        }
        else
        {
            /* First row should run without waiting */
            offset_val = -1;
            check_dep_pos = 0;
        }

        /* EIID: calculate ed_blk_ctxt pointer for current row */
        /* valid for only layer-1. not varified and used for other layers */
        i4_ctb_row_ctr = blk_y / 4;
        ps_ed_blk_ctxt_curr_row =
            ps_ctxt->ps_ed_blk + (i4_ctb_row_ctr * i4_num_ctbs_in_row *
                                  i4_num_4x4_blocks_in_ctb_at_l1);  //valid for L1 only
        ps_ed_ctb_l1_row = ps_ctxt->ps_ed_ctb_l1 + (i4_ctb_row_ctr * i4_num_ctbs_in_row);

        /* if non-encode layer then i4_ctb_x will be same as blk_x */
        /* loop over all the units is a row                        */
        for(; i4_ctb_x < num_sync_units_in_row; i4_ctb_x++)
        {
            ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_ctb;  //EIDD
            ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_curr;
            WORD32 i4_ctb_blk_ctr = i4_ctb_x / 4;

            /* Wait till top row block is processed   */
            /* Currently checking till top right block*/

            /* Disabled since all candidates, except for */
            /* L and C, are projected from the coarser layer, */
            /* only in ME_HIGH_SPEED mode */
            if((ME_MEDIUM_SPEED > e_me_quality_presets))
            {
                if(i4_ctb_x < (num_sync_units_in_row - 1))
                {
                    ihevce_dmgr_chk_row_row_sync(
                        pv_hme_dep_mngr,
                        i4_ctb_x,
                        offset_val,
                        check_dep_pos,
                        0, /* Col Tile No. : Not supported in PreEnc*/
                        ps_ctxt->thrd_id);
                }
            }

            {
                /* for non encoder layer only one block is processed */
                num_blks_in_this_ctb = 1;
            }

            /* EIID: derive ed_ctxt ptr for current CTB */
            ps_ed_blk_ctxt_curr_ctb =
                ps_ed_blk_ctxt_curr_row +
                (i4_ctb_blk_ctr *
                 i4_num_4x4_blocks_in_ctb_at_l1);  //currently valid for l1 layer only
            ps_ed_ctb_l1_curr = ps_ed_ctb_l1_row + i4_ctb_blk_ctr;

            /* loop over all the blocks in CTB will always be 1 */
            for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
            {
                {
                    /* non encode layer */
                    blk_x = i4_ctb_x;
                    blk_id_in_full_ctb = 0;
                    s_search_prms_blk.i4_cu_x_off = s_search_prms_blk.i4_cu_y_off = 0;
                }

                /* get the current input blk point */
                pos_x = blk_x << blk_size_shift;
                pos_y = blk_y << blk_size_shift;
                pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);

                /*********************************************************************/
                /* replicate the inp buffer at blk or ctb level for each ref id,     */
                /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
                /* thereby avoiding a bloat up of memory. If we did all references   */
                /* weighted pred, we will end up with a duplicate copy of each ref   */
                /* at each layer, since we need to preserve the original reference.  */
                /* ToDo: Need to observe performance with this mechanism and compare */
                /* with case where ref is weighted.                                  */
                /*********************************************************************/
                if(blk_id_in_ctb == 0)
                {
                    fp_get_wt_inp(
                        ps_curr_layer,
                        &ps_ctxt->s_wt_pred,
                        unit_size,
                        pos_x,
                        pos_y,
                        unit_size,
                        ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
                        ps_ctxt->i4_wt_pred_enable_flag);
                }

                s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
                s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
                /* Select search results from a suitable search result in the context */
                {
                    ps_search_results = &ps_ctxt->s_search_results_8x8;
                }

                s_search_prms_blk.ps_search_results = ps_search_results;

                /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
                hme_reset_search_results(
                    ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);

                /* Loop across different Ref IDx */
                for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref_fpel; i1_ref_idx++)
                {
                    S32 next_blk_offset = (e_search_blk_size == BLK_16x16) ? 22 : 12;
                    S32 prev_blk_offset = 6;
                    S32 resultid;

                    /*********************************************************************/
                    /* For every blk in the picture, the search range needs to be derived*/
                    /* Any blk can have any mv, but practical search constraints are     */
                    /* imposed by the picture boundary and amt of padding.               */
                    /*********************************************************************/
                    /* MV limit is different based on ref. PIC */
                    hme_derive_search_range(
                        &s_range_prms_inp,
                        &s_pic_limit_inp,
                        &as_mv_limit[i1_ref_idx],
                        pos_x,
                        pos_y,
                        blk_wd,
                        blk_ht);
                    hme_derive_search_range(
                        &s_range_prms_rec,
                        &s_pic_limit_rec,
                        &as_mv_limit[i1_ref_idx],
                        pos_x,
                        pos_y,
                        blk_wd,
                        blk_ht);

                    s_search_prms_blk.i1_ref_idx = i1_ref_idx;
                    ps_candt_zeromv->i1_ref_idx = i1_ref_idx;

                    i4_num_srch_cands = 1;

                    if(1 != ps_refine_prms->i4_layer_id)
                    {
                        S32 x, y;
                        x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
                        y = gau1_encode_to_raster_y[blk_id_in_full_ctb];

                        if(ME_MEDIUM_SPEED > e_me_quality_presets)
                        {
                            hme_get_spatial_candt(
                                ps_curr_layer,
                                e_search_blk_size,
                                blk_x,
                                blk_y,
                                i1_ref_idx,
                                &as_top_neighbours[0],
                                &as_left_neighbours[0],
                                0,
                                ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
                                0,
                                ps_refine_prms->i4_encode);

                            *ps_candt_tr = as_top_neighbours[3];
                            *ps_candt_t = as_top_neighbours[1];
                            *ps_candt_tl = as_top_neighbours[0];
                            i4_num_srch_cands += 3;
                        }
                        else
                        {
                            layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
                            S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
                            S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
                            search_node_t *ps_search_node;
                            S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
                            hme_mv_t *ps_mv, *ps_mv_base;
                            S08 *pi1_ref_idx, *pi1_ref_idx_base;
                            S32 jump = 1, mvs_in_blk, mvs_in_row;
                            S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);

                            if(i4_blk_size1 != i4_blk_size2)
                            {
                                blk_x_temp <<= 1;
                                blk_y_temp <<= 1;
                                jump = 2;
                                if((i4_blk_size1 << 2) == i4_blk_size2)
                                {
                                    blk_x_temp <<= 1;
                                    blk_y_temp <<= 1;
                                    jump = 4;
                                }
                            }

                            mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
                            mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;

                            /* Adjust teh blk coord to point to top left locn */
                            blk_x_temp -= 1;
                            blk_y_temp -= 1;

                            /* Pick up the mvs from the location */
                            i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
                            i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);

                            ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
                            pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;

                            ps_mv += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
                            pi1_ref_idx += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);

                            ps_mv_base = ps_mv;
                            pi1_ref_idx_base = pi1_ref_idx;

                            ps_search_node = &as_left_neighbours[0];
                            ps_mv = ps_mv_base + mvs_in_row;
                            pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
                            COPY_MV_TO_SEARCH_NODE(
                                ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);

                            i4_num_srch_cands++;
                        }
                    }
                    else
                    {
                        S32 x, y;
                        x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
                        y = gau1_encode_to_raster_y[blk_id_in_full_ctb];

                        if(ME_MEDIUM_SPEED > e_me_quality_presets)
                        {
                            hme_get_spatial_candt_in_l1_me(
                                ps_curr_layer,
                                e_search_blk_size,
                                blk_x,
                                blk_y,
                                i1_ref_idx,
                                !ps_search_results->pu1_is_past[i1_ref_idx],
                                &as_top_neighbours[0],
                                &as_left_neighbours[0],
                                0,
                                ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
                                0,
                                ps_ctxt->s_frm_prms.u1_num_active_ref_l0,
                                ps_ctxt->s_frm_prms.u1_num_active_ref_l1);

                            *ps_candt_tr = as_top_neighbours[3];
                            *ps_candt_t = as_top_neighbours[1];
                            *ps_candt_tl = as_top_neighbours[0];

                            i4_num_srch_cands += 3;
                        }
                        else
                        {
                            layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
                            S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
                            S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
                            S32 i4_mv_pos_in_implicit_array;
                            search_node_t *ps_search_node;
                            S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
                            hme_mv_t *ps_mv, *ps_mv_base;
                            S08 *pi1_ref_idx, *pi1_ref_idx_base;
                            S32 jump = 1, mvs_in_blk, mvs_in_row;
                            S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
                            U08 u1_pred_dir = !ps_search_results->pu1_is_past[i1_ref_idx];
                            S32 i4_num_results_in_given_dir =
                                ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
                                                       ps_ctxt->s_frm_prms.u1_num_active_ref_l1)
                                                    : (ps_layer_mvbank->i4_num_mvs_per_ref *
                                                       ps_ctxt->s_frm_prms.u1_num_active_ref_l0));

                            if(i4_blk_size1 != i4_blk_size2)
                            {
                                blk_x_temp <<= 1;
                                blk_y_temp <<= 1;
                                jump = 2;
                                if((i4_blk_size1 << 2) == i4_blk_size2)
                                {
                                    blk_x_temp <<= 1;
                                    blk_y_temp <<= 1;
                                    jump = 4;
                                }
                            }

                            mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
                            mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;

                            /* Adjust teh blk coord to point to top left locn */
                            blk_x_temp -= 1;
                            blk_y_temp -= 1;

                            /* Pick up the mvs from the location */
                            i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
                            i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);

                            i4_offset +=
                                ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
                                                       ps_ctxt->s_frm_prms.u1_num_active_ref_l0)
                                                    : 0);

                            ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
                            pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;

                            ps_mv_base = ps_mv;
                            pi1_ref_idx_base = pi1_ref_idx;

                            {
                                /* ps_mv and pi1_ref_idx now point to the top left locn */
                                ps_search_node = &as_left_neighbours[0];
                                ps_mv = ps_mv_base + mvs_in_row;
                                pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;

                                i4_mv_pos_in_implicit_array =
                                    hme_find_pos_of_implicitly_stored_ref_id(
                                        pi1_ref_idx, i1_ref_idx, 0, i4_num_results_in_given_dir);

                                if(-1 != i4_mv_pos_in_implicit_array)
                                {
                                    COPY_MV_TO_SEARCH_NODE(
                                        ps_search_node,
                                        &ps_mv[i4_mv_pos_in_implicit_array],
                                        &pi1_ref_idx[i4_mv_pos_in_implicit_array],
                                        i1_ref_idx,
                                        shift);
                                }
                                else
                                {
                                    ps_search_node->u1_is_avail = 0;
                                    ps_search_node->s_mv.i2_mvx = 0;
                                    ps_search_node->s_mv.i2_mvy = 0;
                                    ps_search_node->i1_ref_idx = i1_ref_idx;
                                }

                                i4_num_srch_cands++;
                            }
                        }
                    }

                    *ps_candt_l = as_left_neighbours[0];

                    /* when 16x16 is searched in an encode layer, and the prev layer */
                    /* stores results for 4x4 blks, we project 5 candts corresponding */
                    /* to (2,2), (2,14), (14,2), 14,14) and 2nd best of (2,2) */
                    /* However in other cases, only 2,2 best and 2nd best reqd */
                    resultid = 0;
                    pf_hme_project_coloc_candt(
                        ps_candt_prj_coloc[0],
                        ps_curr_layer,
                        ps_coarse_layer,
                        pos_x + 2,
                        pos_y + 2,
                        i1_ref_idx,
                        resultid);

                    i4_num_srch_cands++;

                    resultid = 1;
                    if(num_results_prev_layer > 1)
                    {
                        pf_hme_project_coloc_candt(
                            ps_candt_prj_coloc[1],
                            ps_curr_layer,
                            ps_coarse_layer,
                            pos_x + 2,
                            pos_y + 2,
                            i1_ref_idx,
                            resultid);

                        i4_num_srch_cands++;
                    }

                    resultid = 0;

                    if(ME_MEDIUM_SPEED <= e_me_quality_presets)
                    {
                        pf_hme_project_coloc_candt(
                            ps_candt_prj_t[0],
                            ps_curr_layer,
                            ps_coarse_layer,
                            pos_x,
                            pos_y - prev_blk_offset,
                            i1_ref_idx,
                            resultid);

                        i4_num_srch_cands++;
                    }

                    {
                        pf_hme_project_coloc_candt(
                            ps_candt_prj_br[0],
                            ps_curr_layer,
                            ps_coarse_layer,
                            pos_x + next_blk_offset,
                            pos_y + next_blk_offset,
                            i1_ref_idx,
                            resultid);
                        pf_hme_project_coloc_candt(
                            ps_candt_prj_bl[0],
                            ps_curr_layer,
                            ps_coarse_layer,
                            pos_x - prev_blk_offset,
                            pos_y + next_blk_offset,
                            i1_ref_idx,
                            resultid);
                        pf_hme_project_coloc_candt(
                            ps_candt_prj_r[0],
                            ps_curr_layer,
                            ps_coarse_layer,
                            pos_x + next_blk_offset,
                            pos_y,
                            i1_ref_idx,
                            resultid);
                        pf_hme_project_coloc_candt(
                            ps_candt_prj_b[0],
                            ps_curr_layer,
                            ps_coarse_layer,
                            pos_x,
                            pos_y + next_blk_offset,
                            i1_ref_idx,
                            resultid);

                        i4_num_srch_cands += 4;

                        if(ME_MEDIUM_SPEED <= e_me_quality_presets)
                        {
                            pf_hme_project_coloc_candt(
                                ps_candt_prj_tr[0],
                                ps_curr_layer,
                                ps_coarse_layer,
                                pos_x + next_blk_offset,
                                pos_y - prev_blk_offset,
                                i1_ref_idx,
                                resultid);
                            pf_hme_project_coloc_candt(
                                ps_candt_prj_tl[0],
                                ps_curr_layer,
                                ps_coarse_layer,
                                pos_x - prev_blk_offset,
                                pos_y - prev_blk_offset,
                                i1_ref_idx,
                                resultid);

                            i4_num_srch_cands += 2;
                        }
                    }
                    if((num_results_prev_layer > 1) && (e_search_complexity >= SEARCH_CX_MED))
                    {
                        resultid = 1;
                        pf_hme_project_coloc_candt(
                            ps_candt_prj_br[1],
                            ps_curr_layer,
                            ps_coarse_layer,
                            pos_x + next_blk_offset,
                            pos_y + next_blk_offset,
                            i1_ref_idx,
                            resultid);
                        pf_hme_project_coloc_candt(
                            ps_candt_prj_bl[1],
                            ps_curr_layer,
                            ps_coarse_layer,
                            pos_x - prev_blk_offset,
                            pos_y + next_blk_offset,
                            i1_ref_idx,
                            resultid);
                        pf_hme_project_coloc_candt(
                            ps_candt_prj_r[1],
                            ps_curr_layer,
                            ps_coarse_layer,
                            pos_x + next_blk_offset,
                            pos_y,
                            i1_ref_idx,
                            resultid);
                        pf_hme_project_coloc_candt(
                            ps_candt_prj_b[1],
                            ps_curr_layer,
                            ps_coarse_layer,
                            pos_x,
                            pos_y + next_blk_offset,
                            i1_ref_idx,
                            resultid);

                        i4_num_srch_cands += 4;

                        pf_hme_project_coloc_candt(
                            ps_candt_prj_tr[1],
                            ps_curr_layer,
                            ps_coarse_layer,
                            pos_x + next_blk_offset,
                            pos_y - prev_blk_offset,
                            i1_ref_idx,
                            resultid);
                        pf_hme_project_coloc_candt(
                            ps_candt_prj_tl[1],
                            ps_curr_layer,
                            ps_coarse_layer,
                            pos_x - prev_blk_offset,
                            pos_y - prev_blk_offset,
                            i1_ref_idx,
                            resultid);
                        pf_hme_project_coloc_candt(
                            ps_candt_prj_t[1],
                            ps_curr_layer,
                            ps_coarse_layer,
                            pos_x,
                            pos_y - prev_blk_offset,
                            i1_ref_idx,
                            resultid);

                        i4_num_srch_cands += 3;
                    }

                    /* Note this block also clips the MV range for all candidates */
#ifdef _DEBUG
                    {
                        S32 candt;
                        range_prms_t *ps_range_prms;

                        S32 num_ref_valid = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
                        for(candt = 0; candt < i4_num_srch_cands; candt++)
                        {
                            search_node_t *ps_search_node;

                            ps_search_node =
                                s_search_prms_blk.ps_search_candts[candt].ps_search_node;

                            ps_range_prms = s_search_prms_blk.aps_mv_range[0];

                            if((ps_search_node->i1_ref_idx >= num_ref_valid) ||
                               (ps_search_node->i1_ref_idx < 0))
                            {
                                ASSERT(0);
                            }
                        }
                    }
#endif

                    {
                        S32 srch_cand;
                        S32 num_unique_nodes = 0;
                        S32 num_nodes_searched = 0;
                        S32 num_best_cand = 0;
                        S08 i1_grid_enable = 0;
                        search_node_t as_best_two_proj_node[TOT_NUM_PARTS * 2];
                        /* has list of valid partition to search terminated by -1 */
                        S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
                        S32 center_x;
                        S32 center_y;

                        /* indicates if the centre point of grid needs to be explicitly added for search */
                        S32 add_centre = 0;

                        memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
                        center_x = ps_candt_prj_coloc[0]->s_mv.i2_mvx;
                        center_y = ps_candt_prj_coloc[0]->s_mv.i2_mvy;

                        for(srch_cand = 0;
                            (srch_cand < i4_num_srch_cands) &&
                            (num_unique_nodes <= s_search_prms_blk.i4_num_init_candts);
                            srch_cand++)
                        {
                            search_node_t s_search_node_temp =
                                s_search_prms_blk.ps_search_candts[srch_cand].ps_search_node[0];

                            s_search_node_temp.i1_ref_idx = i1_ref_idx;  //TEMP FIX;

                            /* Clip the motion vectors as well here since after clipping
                            two candidates can become same and they will be removed during deduplication */
                            CLIP_MV_WITHIN_RANGE(
                                s_search_node_temp.s_mv.i2_mvx,
                                s_search_node_temp.s_mv.i2_mvy,
                                s_search_prms_blk.aps_mv_range[0],
                                ps_refine_prms->i4_num_steps_fpel_refine,
                                ps_refine_prms->i4_num_steps_hpel_refine,
                                ps_refine_prms->i4_num_steps_qpel_refine);

                            /* PT_C */
                            INSERT_NEW_NODE(
                                as_unique_search_nodes,
                                num_unique_nodes,
                                s_search_node_temp,
                                0,
                                au4_unique_node_map,
                                center_x,
                                center_y,
                                1);

                            num_nodes_searched += 1;
                        }
                        num_unique_nodes =
                            MIN(num_unique_nodes, s_search_prms_blk.i4_num_init_candts);

                        /* If number of candidates projected/number of candidates to be refined are more than 2,
                        then filter out and choose the best two here */
                        if(num_unique_nodes >= 2)
                        {
                            S32 num_results;
                            S32 cnt;
                            S32 *pi4_valid_part_ids;
                            s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
                            s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
                            pi4_valid_part_ids = &ai4_valid_part_ids[0];

                            /* pi4_valid_part_ids is updated inside */
                            hme_pred_search_no_encode(
                                &s_search_prms_blk,
                                ps_curr_layer,
                                &ps_ctxt->s_wt_pred,
                                pi4_valid_part_ids,
                                1,
                                e_me_quality_presets,
                                i1_grid_enable,
                                (ihevce_me_optimised_function_list_t *)
                                    ps_ctxt->pv_me_optimised_function_list

                            );

                            num_best_cand = 0;
                            cnt = 0;
                            num_results = ps_search_results->u1_num_results_per_part;

                            while((id = pi4_valid_part_ids[cnt++]) >= 0)
                            {
                                num_results =
                                    MIN(ps_refine_prms->pu1_num_best_results[id], num_results);

                                for(i = 0; i < num_results; i++)
                                {
                                    search_node_t s_search_node_temp;
                                    s_search_node_temp =
                                        *(ps_search_results->aps_part_results[i1_ref_idx][id] + i);
                                    if(s_search_node_temp.i1_ref_idx >= 0)
                                    {
                                        INSERT_NEW_NODE_NOMAP(
                                            as_best_two_proj_node,
                                            num_best_cand,
                                            s_search_node_temp,
                                            0);
                                    }
                                }
                            }
                        }
                        else
                        {
                            add_centre = 1;
                            num_best_cand = num_unique_nodes;
                            as_best_two_proj_node[0] = as_unique_search_nodes[0];
                        }

                        num_unique_nodes = 0;
                        num_nodes_searched = 0;

                        if(1 == num_best_cand)
                        {
                            search_node_t s_search_node_temp = as_best_two_proj_node[0];
                            S16 i2_mv_x = s_search_node_temp.s_mv.i2_mvx;
                            S16 i2_mv_y = s_search_node_temp.s_mv.i2_mvy;
                            S08 i1_ref_idx = s_search_node_temp.i1_ref_idx;

                            i1_grid_enable = 1;

                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;

                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;

                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;

                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;

                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;

                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;

                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;

                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;

                            if(add_centre)
                            {
                                as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
                                as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
                                as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
                            }
                        }
                        else
                        {
                            /* For the candidates where refinement was required, choose the best two */
                            for(srch_cand = 0; srch_cand < num_best_cand; srch_cand++)
                            {
                                search_node_t s_search_node_temp = as_best_two_proj_node[srch_cand];
                                WORD32 mv_x = s_search_node_temp.s_mv.i2_mvx;
                                WORD32 mv_y = s_search_node_temp.s_mv.i2_mvy;

                                /* Because there may not be two best unique candidates (because of clipping),
                                second best candidate can be uninitialized, ignore that */
                                if(s_search_node_temp.s_mv.i2_mvx == INTRA_MV ||
                                   s_search_node_temp.i1_ref_idx < 0)
                                {
                                    num_nodes_searched++;
                                    continue;
                                }

                                /* PT_C */
                                /* Since the center point has already be evaluated and best results are persistent,
                                it will not be evaluated again */
                                if(add_centre) /* centre point added explicitly again if search results is not updated */
                                {
                                    INSERT_NEW_NODE(
                                        as_unique_search_nodes,
                                        num_unique_nodes,
                                        s_search_node_temp,
                                        0,
                                        au4_unique_node_map,
                                        center_x,
                                        center_y,
                                        1);
                                }

                                /* PT_L */
                                s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
                                s_search_node_temp.s_mv.i2_mvy = mv_y;
                                INSERT_NEW_NODE(
                                    as_unique_search_nodes,
                                    num_unique_nodes,
                                    s_search_node_temp,
                                    0,
                                    au4_unique_node_map,
                                    center_x,
                                    center_y,
                                    1);

                                /* PT_T */
                                s_search_node_temp.s_mv.i2_mvx = mv_x;
                                s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
                                INSERT_NEW_NODE(
                                    as_unique_search_nodes,
                                    num_unique_nodes,
                                    s_search_node_temp,
                                    0,
                                    au4_unique_node_map,
                                    center_x,
                                    center_y,
                                    1);

                                /* PT_R */
                                s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
                                s_search_node_temp.s_mv.i2_mvy = mv_y;
                                INSERT_NEW_NODE(
                                    as_unique_search_nodes,
                                    num_unique_nodes,
                                    s_search_node_temp,
                                    0,
                                    au4_unique_node_map,
                                    center_x,
                                    center_y,
                                    1);

                                /* PT_B */
                                s_search_node_temp.s_mv.i2_mvx = mv_x;
                                s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
                                INSERT_NEW_NODE(
                                    as_unique_search_nodes,
                                    num_unique_nodes,
                                    s_search_node_temp,
                                    0,
                                    au4_unique_node_map,
                                    center_x,
                                    center_y,
                                    1);

                                /* PT_TL */
                                s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
                                s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
                                INSERT_NEW_NODE(
                                    as_unique_search_nodes,
                                    num_unique_nodes,
                                    s_search_node_temp,
                                    0,
                                    au4_unique_node_map,
                                    center_x,
                                    center_y,
                                    1);

                                /* PT_TR */
                                s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
                                s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
                                INSERT_NEW_NODE(
                                    as_unique_search_nodes,
                                    num_unique_nodes,
                                    s_search_node_temp,
                                    0,
                                    au4_unique_node_map,
                                    center_x,
                                    center_y,
                                    1);

                                /* PT_BL */
                                s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
                                s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
                                INSERT_NEW_NODE(
                                    as_unique_search_nodes,
                                    num_unique_nodes,
                                    s_search_node_temp,
                                    0,
                                    au4_unique_node_map,
                                    center_x,
                                    center_y,
                                    1);

                                /* PT_BR */
                                s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
                                s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
                                INSERT_NEW_NODE(
                                    as_unique_search_nodes,
                                    num_unique_nodes,
                                    s_search_node_temp,
                                    0,
                                    au4_unique_node_map,
                                    center_x,
                                    center_y,
                                    1);
                            }
                        }

                        s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
                        s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;

                        /*****************************************************************/
                        /* Call the search algorithm, this includes:                     */
                        /* Pre-Search-Refinement (for coarse candts)                     */
                        /* Search on each candidate                                      */
                        /* Post Search Refinement on winners/other new candidates        */
                        /*****************************************************************/

                        hme_pred_search_no_encode(
                            &s_search_prms_blk,
                            ps_curr_layer,
                            &ps_ctxt->s_wt_pred,
                            ai4_valid_part_ids,
                            0,
                            e_me_quality_presets,
                            i1_grid_enable,
                            (ihevce_me_optimised_function_list_t *)
                                ps_ctxt->pv_me_optimised_function_list);

                        i1_grid_enable = 0;
                    }
                }

                /* for non encode layer update MV and end processing for block */
                {
                    WORD32 i4_ref_id, min_cost = 0x7fffffff, min_sad = 0;
                    search_node_t *ps_search_node;
                    /* now update the reqd results back to the layer mv bank. */
                    if(1 == ps_refine_prms->i4_layer_id)
                    {
                        hme_update_mv_bank_in_l1_me(
                            ps_search_results,
                            ps_curr_layer->ps_layer_mvbank,
                            blk_x,
                            blk_y,
                            &s_mv_update_prms);
                    }
                    else
                    {
                        hme_update_mv_bank_noencode(
                            ps_search_results,
                            ps_curr_layer->ps_layer_mvbank,
                            blk_x,
                            blk_y,
                            &s_mv_update_prms);
                    }

                    /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
                    /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
                    if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
                    {
                        WORD32 i4_j;
                        layer_mv_t *ps_layer_mv = ps_curr_layer->ps_layer_mvbank;

                        //if (ps_layer_mv->e_blk_size == s_mv_update_prms.e_search_blk_size)
                        /* Not considering this for Dyn. Search Update */
                        {
                            for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
                                i4_ref_id++)
                            {
                                ps_search_node =
                                    ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];

                                for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
                                {
                                    hme_update_dynamic_search_params(
                                        &ps_ctxt->s_coarse_dyn_range_prms
                                             .as_dyn_range_prms[ps_refine_prms->i4_layer_id]
                                                               [i4_ref_id],
                                        ps_search_node->s_mv.i2_mvy);

                                    ps_search_node++;
                                }
                            }
                        }
                    }

                    if(1 == ps_refine_prms->i4_layer_id)
                    {
                        WORD32 wt_pred_val, log_wt_pred_val;
                        WORD32 ref_id_of_nearest_poc = 0;
                        WORD32 max_val = 0x7fffffff;
                        WORD32 max_l0_val = 0x7fffffff;
                        WORD32 max_l1_val = 0x7fffffff;
                        WORD32 cur_val;
                        WORD32 i4_local_weighted_sad, i4_local_cost_weighted_pred;

                        WORD32 bestl0_sad = 0x7fffffff;
                        WORD32 bestl1_sad = 0x7fffffff;
                        search_node_t *ps_best_l0_blk = NULL, *ps_best_l1_blk = NULL;

                        for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
                            i4_ref_id++)
                        {
                            wt_pred_val = ps_ctxt->s_wt_pred.a_wpred_wt[i4_ref_id];
                            log_wt_pred_val = ps_ctxt->s_wt_pred.wpred_log_wdc;

                            ps_search_node =
                                ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];

                            i4_local_weighted_sad = ((ps_search_node->i4_sad * wt_pred_val) +
                                                     ((1 << log_wt_pred_val) >> 1)) >>
                                                    log_wt_pred_val;

                            i4_local_cost_weighted_pred =
                                i4_local_weighted_sad +
                                (ps_search_node->i4_tot_cost - ps_search_node->i4_sad);
                            //the loop is redundant as the results are already sorted based on total cost
                            //for (i4_j = 0; i4_j < ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref; i4_j++)
                            {
                                if(i4_local_cost_weighted_pred < min_cost)
                                {
                                    min_cost = i4_local_cost_weighted_pred;
                                    min_sad = i4_local_weighted_sad;
                                }
                            }

                            /* For P frame, calculate the nearest poc which is either P or I frame*/
                            if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
                            {
                                if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id])
                                {
                                    cur_val =
                                        ABS(ps_ctxt->i4_curr_poc -
                                            ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id]);
                                    if(cur_val < max_val)
                                    {
                                        max_val = cur_val;
                                        ref_id_of_nearest_poc = i4_ref_id;
                                    }
                                }
                            }
                        }
                        /*Store me cost wrt. to past frame only for P frame  */
                        if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
                        {
                            if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
                            {
                                WORD16 i2_mvx, i2_mvy;

                                WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
                                WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
                                WORD32 z_scan_idx =
                                    gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
                                WORD32 wt, log_wt;

                                /*ASSERT((ps_ctxt->i4_curr_poc - ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
                                <= (1 + ps_ctxt->num_b_frms));*/

                                /*obtain mvx and mvy */
                                i2_mvx =
                                    ps_search_results
                                        ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
                                        ->s_mv.i2_mvx;
                                i2_mvy =
                                    ps_search_results
                                        ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
                                        ->s_mv.i2_mvy;

                                /*register the min cost for l1 me in blk context */
                                wt = ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_of_nearest_poc];
                                log_wt = ps_ctxt->s_wt_pred.wpred_log_wdc;

                                /*register the min cost for l1 me in blk context */
                                ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] =
                                    ((ps_search_results
                                          ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
                                          ->i4_sad *
                                      wt) +
                                     ((1 << log_wt) >> 1)) >>
                                    log_wt;
                                ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] =
                                    ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] +
                                    (ps_search_results
                                         ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
                                         ->i4_tot_cost -
                                     ps_search_results
                                         ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
                                         ->i4_sad);
                                /*for complexity change detection*/
                                ps_ctxt->i4_num_blks++;
                                if(ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] >
                                   (8 /*blk width*/ * 8 /*blk height*/ * (1 + ps_ctxt->num_b_frms)))
                                {
                                    ps_ctxt->i4_num_blks_high_sad++;
                                }
                            }
                        }
                    }

                    /* EIID: Early inter intra decisions */
                    /* tap L1 level SAD for inter intra decisions */
                    if((e_me_quality_presets >= ME_MEDIUM_SPEED) &&
                       (!ps_ctxt->s_frm_prms
                             .is_i_pic))  //for high-quality preset->disable early decisions
                    {
                        if(1 == ps_refine_prms->i4_layer_id)
                        {
                            WORD32 i4_min_sad_cost_8x8_block = min_cost;
                            ihevce_ed_blk_t *ps_curr_ed_blk_ctxt;
                            WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
                            WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
                            WORD32 z_scan_idx =
                                gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
                            ps_curr_ed_blk_ctxt = ps_ed_blk_ctxt_curr_ctb + z_scan_idx;

                            /*register the min cost for l1 me in blk context */
                            ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
                                i4_min_sad_cost_8x8_block;
                            i4_num_comparisions++;

                            /* take early inter-intra decision here */
                            ps_curr_ed_blk_ctxt->intra_or_inter = 3; /*init saying eval both */
#if DISABLE_INTRA_IN_BPICS
                            if((e_me_quality_presets == ME_XTREME_SPEED_25) &&
                               (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
                            {
                                ps_curr_ed_blk_ctxt->intra_or_inter =
                                    2; /*eval only inter if inter cost is less */
                                i4_num_inter_wins++;
                            }
                            else
#endif
                            {
                                if(ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] <
                                   ((ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2] *
                                     i4_threshold_multiplier) /
                                    i4_threshold_divider))
                                {
                                    ps_curr_ed_blk_ctxt->intra_or_inter =
                                        2; /*eval only inter if inter cost is less */
                                    i4_num_inter_wins++;
                                }
                            }

                            //{
                            //  DBG_PRINTF ("(blk x, blk y):(%d, %d)\t me:(ctb_x, ctb_y):(%d, %d)\t intra_SAD_COST: %d\tInter_SAD_COST: %d\n",
                            //      blk_x,blk_y,
                            //      i4_ctb_blk_ctr, i4_ctb_row_ctr,
                            //      ps_curr_ed_blk_ctxt->i4_best_sad_8x8_l1_ipe,
                            //      i4_min_sad_cost_8x8_block
                            //      );
                            //}

                        }  //end of layer-1
                    }  //end of if (e_me_quality_presets >= ME_MEDIUM_SPEED)
                    else
                    {
                        if(1 == ps_refine_prms->i4_layer_id)
                        {
                            WORD32 i4_min_sad_cost_8x8_block = min_cost;
                            WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
                            WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
                            WORD32 z_scan_idx =
                                gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];

                            /*register the min cost for l1 me in blk context */
                            ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
                                i4_min_sad_cost_8x8_block;
                        }
                    }
                    if(1 == ps_refine_prms->i4_layer_id)
                    {
                        WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
                        WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
                        WORD32 z_scan_idx =
                            gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];

                        ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me_for_decide[z_scan_idx >> 2] =
                            min_sad;

                        if(min_cost <
                           ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2])
                        {
                            ps_ctxt->i4_L1_hme_best_cost += min_cost;
                            ps_ctxt->i4_L1_hme_sad += min_sad;
                            ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] = min_sad;
                        }
                        else
                        {
                            ps_ctxt->i4_L1_hme_best_cost +=
                                ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2];
                            ps_ctxt->i4_L1_hme_sad +=
                                ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
                            ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] =
                                ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
                        }
                    }
                }
            }

            /* Update the number of blocks processed in the current row */
            if((ME_MEDIUM_SPEED > e_me_quality_presets))
            {
                ihevce_dmgr_set_row_row_sync(
                    pv_hme_dep_mngr,
                    (i4_ctb_x + 1),
                    blk_y,
                    0 /* Col Tile No. : Not supported in PreEnc*/);
            }
        }

        /* set the output dependency after completion of row */
        ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
    }
}