C++程序  |  799行  |  31.35 KB

/******************************************************************************
 *
 * Copyright (C) 2018 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 *****************************************************************************
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
/**
******************************************************************************
* @file hme_search_algo.c
*
* @brief
*    Contains various search algorithms to be used by coarse/refinement layers
*
* @author
*    Ittiam
*
*
* List of Functions
* hme_compute_grid_results_step_gt_1()
* hme_compute_grid_results_step_1()
* hme_pred_search_square_stepn()
*
******************************************************************************
*/

/*****************************************************************************/
/* File Includes                                                             */
/*****************************************************************************/
/* System include files */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include <stdarg.h>
#include <math.h>
#include <limits.h>

/* User include files */
#include "ihevc_typedefs.h"
#include "itt_video_api.h"
#include "ihevce_api.h"

#include "rc_cntrl_param.h"
#include "rc_frame_info_collector.h"
#include "rc_look_ahead_params.h"

#include "ihevc_defs.h"
#include "ihevc_structs.h"
#include "ihevc_platform_macros.h"
#include "ihevc_deblk.h"
#include "ihevc_itrans_recon.h"
#include "ihevc_chroma_itrans_recon.h"
#include "ihevc_chroma_intra_pred.h"
#include "ihevc_intra_pred.h"
#include "ihevc_inter_pred.h"
#include "ihevc_mem_fns.h"
#include "ihevc_padding.h"
#include "ihevc_weighted_pred.h"
#include "ihevc_sao.h"
#include "ihevc_resi_trans.h"
#include "ihevc_quant_iquant_ssd.h"
#include "ihevc_cabac_tables.h"

#include "ihevce_defs.h"
#include "ihevce_lap_enc_structs.h"
#include "ihevce_multi_thrd_structs.h"
#include "ihevce_multi_thrd_funcs.h"
#include "ihevce_me_common_defs.h"
#include "ihevce_had_satd.h"
#include "ihevce_error_codes.h"
#include "ihevce_bitstream.h"
#include "ihevce_cabac.h"
#include "ihevce_rdoq_macros.h"
#include "ihevce_function_selector.h"
#include "ihevce_enc_structs.h"
#include "ihevce_entropy_structs.h"
#include "ihevce_cmn_utils_instr_set_router.h"
#include "ihevce_enc_loop_structs.h"
#include "ihevce_bs_compute_ctb.h"
#include "ihevce_global_tables.h"
#include "ihevce_dep_mngr_interface.h"
#include "hme_datatype.h"
#include "hme_interface.h"
#include "hme_common_defs.h"
#include "hme_defs.h"
#include "ihevce_me_instr_set_router.h"
#include "hme_globals.h"
#include "hme_utils.h"
#include "hme_coarse.h"
#include "hme_fullpel.h"
#include "hme_subpel.h"
#include "hme_refine.h"
#include "hme_err_compute.h"
#include "hme_common_utils.h"
#include "hme_search_algo.h"
#include "ihevce_stasino_helpers.h"
#include "ihevce_common_utils.h"

/*****************************************************************************/
/* Function Definitions                                                      */
/*****************************************************************************/

/**
********************************************************************************
*  @fn     void hme_compute_grid_results_step_1(err_prms_t *ps_err_prms,
result_upd_prms_t *ps_result_prms,
BLK_SIZE_T e_blk_size)
*
*  @brief  Updates results for a grid of step = 1
*
*  @param[in] ps_err_prms: Various parameters to this function
*
*  @param[in] ps_result_prms : Parameters pertaining to result updation
*
*  @param[out] e_blk_size: Block size of the blk being searched for
*
*  @return none
********************************************************************************
*/
void hme_compute_grid_results(
    err_prms_t *ps_err_prms, result_upd_prms_t *ps_result_prms, BLK_SIZE_T e_blk_size)
{
    PF_RESULT_FXN_T pf_hme_result_fxn;
    PF_SAD_FXN_T pf_sad_fxn;
    S32 i4_num_results;
    S32 part_id;

    part_id = ps_result_prms->pi4_valid_part_ids[0];

    i4_num_results = (S32)ps_result_prms->ps_search_results->u1_num_results_per_part;

    pf_sad_fxn = hme_get_sad_fxn(e_blk_size, ps_err_prms->i4_grid_mask, ps_err_prms->i4_part_mask);

    pf_hme_result_fxn =
        hme_get_result_fxn(ps_err_prms->i4_grid_mask, ps_err_prms->i4_part_mask, i4_num_results);

    pf_sad_fxn(ps_err_prms);
    pf_hme_result_fxn(ps_result_prms);
}

/**
********************************************************************************
*  @fn     void hme_pred_search_square_stepn(hme_search_prms_t *ps_search_prms,
*                                   layer_ctxt_t *ps_layer_ctxt)
*
*  @brief  Implements predictive search, with square grid refinement. In this
*          case, we start with a bigger step size, like 4, refining upto a
*          variable number of pts, till we hit end of search range or hit a
*          minima. Then we refine using smaller steps. The bigger step size
*          like 4 or 2, do not use optimized SAD functions, they evaluate
*          SAD for each individual pt.
*
*  @param[in,out]  ps_search_prms: All the params to this function
*
*  @param[in] ps_layer_ctxt: Context for the layer
*
*  @return None
********************************************************************************
*/
void hme_pred_search_square_stepn(
    hme_search_prms_t *ps_search_prms,
    layer_ctxt_t *ps_layer_ctxt,
    wgt_pred_ctxt_t *ps_wt_inp_prms,
    ME_QUALITY_PRESETS_T e_me_quality_preset,
    ihevce_me_optimised_function_list_t *ps_me_optimised_function_list

)
{
    /* Stores the SAD for all parts at each pt in the grid */
    S32 ai4_sad_grid[9][TOT_NUM_PARTS];

    S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];

    /* Atributes of input candidates */
    search_candt_t *ps_search_candts;
    search_node_t s_search_node;

    /* Number of candidates to search */
    S32 i4_num_candts, max_num_iters, i4_num_results;

    /* Input and reference attributes */
    S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;

    /* The reference is actually an array of ptrs since there are several    */
    /* reference id. So an array gets passed form calling function           */
    U08 **ppu1_ref;

    /* Holds the search results at the end of this fxn */
    search_results_t *ps_search_results;

    /* These control number of parts and number of pts in grid to search */
    S32 i4_part_mask, i4_grid_mask;

    /* Blk width, blk height and blk size are derived from input params */
    BLK_SIZE_T e_blk_size;
    CU_SIZE_T e_cu_size;
    S32 i4_blk_wd, i4_blk_ht, i4_step, i4_candt, i4_iter;
    S32 i4_inp_off;
    S32 i4_min_id;
    /* Points to the range limits for mv */
    range_prms_t *ps_range_prms;

    /*************************************************************************/
    /* These functions pointers for calculating Err and the result update    */
    /* Each carries its own parameters structure, which is generated on the  */
    /* fly in this function                                                  */
    /*************************************************************************/
    err_prms_t s_err_prms;
    result_upd_prms_t s_result_prms;

    max_num_iters = ps_search_prms->i4_max_iters;
    /* Using the member 0 to store for all ref. idx., see in coarsest */
    ps_range_prms = ps_search_prms->aps_mv_range[0];
    i4_inp_stride = ps_search_prms->i4_inp_stride;
    /* Move to the location of the search blk in inp buffer */
    i4_inp_off = ps_search_prms->i4_cu_x_off;
    i4_inp_off += (ps_search_prms->i4_cu_y_off * i4_inp_stride);

    ps_search_results = ps_search_prms->ps_search_results;

    /*************************************************************************/
    /* Depending on flag i4_use_rec, we use either input of previously       */
    /* encoded pictures or we use recon of previously encoded pictures.      */
    /*************************************************************************/
    if(ps_search_prms->i4_use_rec == 1)
    {
        i4_ref_stride = ps_layer_ctxt->i4_rec_stride;
        ppu1_ref = ps_layer_ctxt->ppu1_list_rec_fxfy;
    }
    else
    {
        i4_ref_stride = ps_layer_ctxt->i4_inp_stride;
        ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
    }
    i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;

    /*************************************************************************/
    /* Obtain the blk size of the search blk. Assumed here that the search   */
    /* is done on a CU size, rather than any arbitrary blk size.             */
    /*************************************************************************/
    ps_search_results = ps_search_prms->ps_search_results;
    e_blk_size = ps_search_prms->e_blk_size;
    i4_blk_wd = (S32)gau1_blk_size_to_wd[e_blk_size];
    i4_blk_ht = (S32)gau1_blk_size_to_ht[e_blk_size];
    e_cu_size = ps_search_results->e_cu_size;
    i4_num_results = (S32)ps_search_results->u1_num_results_per_part;

    ps_search_candts = ps_search_prms->ps_search_candts;
    i4_num_candts = ps_search_prms->i4_num_init_candts;
    i4_part_mask = ps_search_prms->i4_part_mask;

    /*************************************************************************/
    /* This array stores the ids of the partitions whose                     */
    /* SADs are updated. Since the partitions whose SADs are updated may not */
    /* be in contiguous order, we supply another level of indirection.       */
    /*************************************************************************/
    hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);

    /* Update the parameters used to pass to SAD */
    /* input ptr, strides, SAD Grid, part mask, blk width and ht */
    /* The above are fixed ptrs, only pu1_ref and grid mask are  */
    /* varying params which are updated just before calling fxn  */
    s_err_prms.i4_inp_stride = i4_inp_stride;
    s_err_prms.i4_ref_stride = i4_ref_stride;
    s_err_prms.i4_part_mask = i4_part_mask;
    s_err_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
    s_err_prms.i4_blk_wd = i4_blk_wd;
    s_err_prms.i4_blk_ht = i4_blk_ht;
    s_err_prms.pi4_valid_part_ids = ai4_valid_part_ids;

    s_result_prms.pf_mv_cost_compute = ps_search_prms->pf_mv_cost_compute;
    s_result_prms.ps_search_results = ps_search_results;
    s_result_prms.pi4_valid_part_ids = ai4_valid_part_ids;
    s_result_prms.i1_ref_idx = ps_search_prms->i1_ref_idx;
    s_result_prms.i4_part_mask = ps_search_prms->i4_part_mask;
    s_result_prms.ps_search_node_base = &s_search_node;
    s_result_prms.pi4_sad_grid = &ai4_sad_grid[0][0];

    /* Run through each of the candts in a loop */
    for(i4_candt = 0; i4_candt < i4_num_candts; i4_candt++)
    {
        S32 i4_num_refine;

        i4_step = ps_search_prms->i4_start_step;

        s_search_node = *(ps_search_candts->ps_search_node);

        /* initialize minimum cost for this candidate. As we search around */
        /* this candidate, this is used to check early exit, when in any   */
        /* given iteration, the center pt of the grid is lowest value      */
        s_result_prms.i4_min_cost = MAX_32BIT_VAL;

        /* If we need to do refinements, then we need to evaluate */
        /* neighbouring pts. Before doing so, we have to do       */
        /* basic range checks against max allowed mvs             */
        i4_num_refine = ps_search_candts->u1_num_steps_refine;

        CLIP_MV_WITHIN_RANGE(
            s_search_node.s_mv.i2_mvx, s_search_node.s_mv.i2_mvy, ps_range_prms, 0, 0, 0);

        /* The first time, we search all 8 pts around init candt plus the init candt */
        i4_grid_mask = 0x1ff;
        s_err_prms.pu1_inp = ps_wt_inp_prms->apu1_wt_inp[s_search_node.i1_ref_idx] + i4_inp_off;

        for(i4_iter = 0; i4_iter < max_num_iters; i4_iter++)
        {
            i4_grid_mask &= hme_clamp_grid_by_mvrange(&s_search_node, i4_step, ps_range_prms);

            s_err_prms.i4_grid_mask = i4_grid_mask;
            s_err_prms.pu1_ref = ppu1_ref[s_search_node.i1_ref_idx] + i4_ref_offset;
            s_err_prms.pu1_ref +=
                (s_search_node.s_mv.i2_mvx +
                 (s_search_node.s_mv.i2_mvy * s_err_prms.i4_ref_stride));

            s_result_prms.i4_step = i4_step;
            s_err_prms.i4_step = i4_step;
            s_result_prms.i4_grid_mask = i4_grid_mask;

            /* For Top,TopLeft and Left cand., get only center point SAD    */
            /* and do early exit                                            */
            if(0 == i4_num_refine)
            {
                s_err_prms.i4_grid_mask = 0x1;
                s_result_prms.i4_grid_mask = 0x1;

                /* sad pt fun. populates sad to 0th location, whereas update */
                /* fun. takes it based on part. id                           */
                s_err_prms.pi4_sad_grid =
                    s_result_prms.pi4_sad_grid + (1 * s_result_prms.pi4_valid_part_ids[0]);

                ps_me_optimised_function_list->pf_evalsad_pt_npu_mxn_8bit(&s_err_prms);

                s_err_prms.pi4_sad_grid = s_result_prms.pi4_sad_grid;

                if(ME_XTREME_SPEED_25 == e_me_quality_preset)
                    hme_update_results_grid_pu_bestn_xtreme_speed(&s_result_prms);
                else
                    hme_update_results_grid_pu_bestn(&s_result_prms);

                i4_min_id = (S32)PT_C; /* Center Point         */
                i4_step = 0; /* No further refinment */
                s_result_prms.i4_step = i4_step;
                s_err_prms.i4_step = i4_step;
            }
            else
            {
                if(ME_XTREME_SPEED_25 == e_me_quality_preset)
                {
                    err_prms_t *ps_err_prms = &s_err_prms;
                    ASSERT(ps_err_prms->i4_grid_mask != 1);
                    ASSERT((ps_err_prms->i4_part_mask == 4) || (ps_err_prms->i4_part_mask == 16));

                    /*****************************************************************/
                    /* In this case, there are no partial updates. The blk can be    */
                    /* of any type and need not be a CU. The only thing that matters */
                    /* here is the width of the blk, 4/8/(>=16)                      */
                    /*****************************************************************/
                    ps_me_optimised_function_list->pf_evalsad_grid_npu_MxN(&s_err_prms);

                    hme_update_results_grid_pu_bestn_xtreme_speed(&s_result_prms);
                }
                else
                {
                    /* Obtain SAD for all 9 pts in grid*/
                    hme_compute_grid_results(&s_err_prms, &s_result_prms, e_blk_size);
                }

                /* Early exit in case of centre being local minima */
                i4_min_id = s_result_prms.i4_min_id;
            }

            i4_grid_mask = gai4_opt_grid_mask[i4_min_id];

            s_search_node.s_mv.i2_mvx += (i4_step * gai1_grid_id_to_x[i4_min_id]);
            s_search_node.s_mv.i2_mvy += (i4_step * gai1_grid_id_to_y[i4_min_id]);
            if(i4_min_id == (S32)PT_C)
                break;
        }

        /* Next keep reducing stepsize by factor of 2 */
        i4_step >>= 1;
        while(i4_step)
        {
            i4_grid_mask = 0x1fe &
                           hme_clamp_grid_by_mvrange(&s_search_node, i4_step, ps_range_prms);
            //i4_grid_mask &= 0x1fe;

            s_err_prms.i4_grid_mask = i4_grid_mask;
            s_result_prms.i4_grid_mask = i4_grid_mask;
            s_err_prms.i4_step = i4_step;
            s_result_prms.i4_step = i4_step;
            s_err_prms.pu1_ref = ppu1_ref[s_search_node.i1_ref_idx] + i4_ref_offset;
            s_err_prms.pu1_ref +=
                (s_search_node.s_mv.i2_mvx +
                 (s_search_node.s_mv.i2_mvy * s_err_prms.i4_ref_stride));
            if(ME_XTREME_SPEED_25 == e_me_quality_preset)
            {
                err_prms_t *ps_err_prms = &s_err_prms;
                ASSERT(ps_err_prms->i4_grid_mask != 1);
                ASSERT((ps_err_prms->i4_part_mask == 4) || (ps_err_prms->i4_part_mask == 16));

                /*****************************************************************/
                /* In this case, there are no partial updates. The blk can be    */
                /* of any type and need not be a CU. The only thing that matters */
                /* here is the width of the blk, 4/8/(>=16)                      */
                /*****************************************************************/
                ps_me_optimised_function_list->pf_evalsad_grid_npu_MxN(&s_err_prms);

                hme_update_results_grid_pu_bestn_xtreme_speed(&s_result_prms);
            }
            else
            {
                hme_compute_grid_results(&s_err_prms, &s_result_prms, e_blk_size);
            }

            i4_min_id = s_result_prms.i4_min_id;

            s_search_node.s_mv.i2_mvx += (i4_step * gai1_grid_id_to_x[i4_min_id]);
            s_search_node.s_mv.i2_mvy += (i4_step * gai1_grid_id_to_y[i4_min_id]);

            i4_step >>= 1;
        }

        ps_search_candts++;
    }
}

/**
********************************************************************************
*  @fn     hme_pred_search_square_step1(hme_search_prms_t *ps_search_prms,
*                               layer_ctxt_t *ps_layer_ctxt)
*
*  @brief  Implements predictive search with square grid refinement. In this
*           case, the square grid is of step 1 always. since this is considered
*           to be more of a refinement search
*
*  @param[in,out]  ps_search_prms: All the params to this function
*
*  @param[in] ps_layer_ctxt: All info about this layer
*
*  @return None
********************************************************************************
*/
/**
********************************************************************************
*  @fn     hme_pred_search(hme_search_prms_t *ps_search_prms,
*                               layer_ctxt_t *ps_layer_ctxt)
*
*  @brief  Implements predictive search after removing duplicate candidates
*          from initial list. Each square grid (of step 1) is expanded
*          to nine search pts before the dedeuplication process. one point
*          cost is then evaluated for each unique node after the deduplication
*          process
*
*  @param[in,out]  ps_search_prms: All the params to this function
*
*  @param[in] ps_layer_ctxt: All info about this layer
*
*  @return None
********************************************************************************
*/
void hme_pred_search(
    hme_search_prms_t *ps_search_prms,
    layer_ctxt_t *ps_layer_ctxt,
    wgt_pred_ctxt_t *ps_wt_inp_prms,
    S08 i1_grid_flag,
    ihevce_me_optimised_function_list_t *ps_me_optimised_function_list

)
{
    /* Stores the SAD for all parts at each pt in the grid */
    S32 ai4_sad_grid[9 * TOT_NUM_PARTS];

    /* Atributes of input candidates */
    search_node_t *ps_search_node;

    search_results_t *ps_search_results;
    S32 i4_num_nodes, i4_candt;

    /* Input and reference attributes */
    S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;

    /* The reference is actually an array of ptrs since there are several    */
    /* reference id. So an array gets passed form calling function           */
    U08 **ppu1_ref;

    /* These control number of parts and number of pts in grid to search */
    S32 i4_part_mask, i4_grid_mask;

    S32 shift_for_cu_size;

    /* Blk width, blk height and blk size are derived from input params */
    BLK_SIZE_T e_blk_size;
    CU_SIZE_T e_cu_size;
    S32 i4_blk_wd, i4_blk_ht;

    /*************************************************************************/
    /* These functions pointers for calculating Err and the result update    */
    /* Each carries its own parameters structure, which is generated on the  */
    /* fly in this function                                                  */
    /*************************************************************************/
    PF_RESULT_FXN_T pf_hme_result_fxn;
    PF_SAD_FXN_T pf_sad_fxn;
    PF_CALC_SAD_AND_RESULT pf_calc_sad_and_result;
    err_prms_t s_err_prms;
    result_upd_prms_t s_result_prms;
    S32 i4_num_results;
    S32 i4_inp_off;
    fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt;

    i4_inp_stride = ps_search_prms->i4_inp_stride;

    /* Move to the location of the search blk in inp buffer */
    i4_inp_off = ps_search_prms->i4_cu_x_off;
    i4_inp_off += ps_search_prms->i4_cu_y_off * i4_inp_stride;

    /*************************************************************************/
    /* Depending on flag i4_use_rec, we use either input of previously       */
    /* encoded pictures or we use recon of previously encoded pictures.      */
    /*************************************************************************/
    if(ps_search_prms->i4_use_rec == 1)
    {
        i4_ref_stride = ps_layer_ctxt->i4_rec_stride;
        ppu1_ref = ps_layer_ctxt->ppu1_list_rec_fxfy;
    }
    else
    {
        i4_ref_stride = ps_layer_ctxt->i4_rec_stride;
        ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
    }
    i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
    /* Obtain the blk size of the search blk. Assumed here that the search   */
    /* is done on a CU size, rather than any arbitrary blk size.             */
    ps_search_results = ps_search_prms->ps_search_results;
    e_blk_size = ps_search_prms->e_blk_size;
    i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
    i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
    e_cu_size = ps_search_results->e_cu_size;

    /* Assuming cu size of 8x8 as enum 0, the other will be 1, 2, 3 */
    /* This will also set the shift w.r.t. the base cu size of 8x8 */
    shift_for_cu_size = e_cu_size;

    ps_search_node = ps_search_prms->ps_search_nodes;
    i4_num_nodes = ps_search_prms->i4_num_search_nodes;
    i4_part_mask = ps_search_prms->i4_part_mask;

    /* Update the parameters used to pass to SAD */
    /* input ptr, strides, SAD Grid, part mask, blk width and ht */
    /* The above are fixed ptrs, only pu1_ref and grid mask are  */
    /* varying params which are updated just before calling fxn  */
    s_err_prms.i4_inp_stride = i4_inp_stride;
    s_err_prms.i4_ref_stride = i4_ref_stride;
    s_err_prms.i4_part_mask = i4_part_mask;
    s_err_prms.pi4_sad_grid = &ai4_sad_grid[0];
    s_err_prms.i4_blk_wd = i4_blk_wd;
    s_err_prms.i4_blk_ht = i4_blk_ht;
    s_err_prms.i4_step = 1;
    s_err_prms.i4_num_partitions = ps_fullpel_refine_ctxt->i4_num_valid_parts;

    s_result_prms.pf_mv_cost_compute = ps_search_prms->pf_mv_cost_compute;
    s_result_prms.ps_search_results = ps_search_results;
    s_result_prms.i1_ref_idx = (S08)ps_search_prms->i1_ref_idx;
    s_result_prms.pi4_sad_grid = ai4_sad_grid;
    s_result_prms.i4_part_mask = i4_part_mask;
    s_result_prms.i4_step = 1;
    pf_calc_sad_and_result = hme_get_calc_sad_and_result_fxn(
        i1_grid_flag,
        ps_search_prms->u1_is_cu_noisy,
        i4_part_mask,
        ps_fullpel_refine_ctxt->i4_num_valid_parts,
        ps_search_results->u1_num_results_per_part);

    pf_calc_sad_and_result(
        ps_search_prms, ps_wt_inp_prms, &s_err_prms, &s_result_prms, ppu1_ref, i4_ref_stride);
}

static __inline FT_CALC_SAD_AND_RESULT *hme_get_calc_sad_and_result_explicit_fxn(
    ihevce_me_optimised_function_list_t *ps_me_optimised_function_list,
    S32 i4_part_mask,
    S32 i4_num_partitions,
    S08 i1_grid_enable,
    U08 u1_num_results_per_part)
{
    FT_CALC_SAD_AND_RESULT *pf_func = NULL;

    if(2 == u1_num_results_per_part)
    {
        if(i4_part_mask == 1)
        {
            ASSERT(i4_num_partitions == 1);

            if(i1_grid_enable == 0)
            {
                pf_func =
                    ps_me_optimised_function_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8;
            }
            else
            {
                pf_func = ps_me_optimised_function_list
                              ->pf_calc_pt_sad_and_2_best_results_explicit_8x8_for_grid;
            }
        }
        else
        {
            ASSERT(i4_num_partitions == 5);

            pf_func =
                ps_me_optimised_function_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_4x4;
        }
    }
    else if(1 == u1_num_results_per_part)
    {
        if(i4_part_mask == 1)
        {
            ASSERT(i4_num_partitions == 1);

            if(i1_grid_enable == 0)
            {
                pf_func =
                    ps_me_optimised_function_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8;
            }
            else
            {
                pf_func = ps_me_optimised_function_list
                              ->pf_calc_pt_sad_and_1_best_result_explicit_8x8_for_grid;
            }
        }
        else
        {
            ASSERT(i4_num_partitions == 5);

            pf_func =
                ps_me_optimised_function_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_4x4;
        }
    }

    return pf_func;
}

/**
********************************************************************************
*  @fn     void hme_pred_search_no_encode(hme_search_prms_t *ps_search_prms,
*                                         layer_ctxt_t *ps_layer_ctxt,
*                                         wgt_pred_ctxt_t *ps_wt_inp_prms,
*                                         S32 *pi4_valid_part_ids,
*                                         S32 disable_refine,
*                                         ME_QUALITY_PRESETS_T e_me_quality_preset)
*
*  @brief  Implements predictive search after removing duplicate candidates
*          from initial list. Each square grid (of step 1) is expanded
*          to nine search pts before the dedeuplication process. one point
*          cost is then evaluated for each unique node after the deduplication
*          process
*
*  @param[in,out]  ps_search_prms: All the params to this function
*
*  @param[in] ps_layer_ctxt: All info about this layer
*
*  @return None
********************************************************************************
*/
void hme_pred_search_no_encode(
    hme_search_prms_t *ps_search_prms,
    layer_ctxt_t *ps_layer_ctxt,
    wgt_pred_ctxt_t *ps_wt_inp_prms,
    S32 *pi4_valid_part_ids,
    S32 disable_refine,
    ME_QUALITY_PRESETS_T e_me_quality_preset,
    S08 i1_grid_enable,
    ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
{
    /* Stores the SAD for all parts at each pt in the grid */
    S32 ai4_sad_grid[9 * TOT_NUM_PARTS];

    /* Atributes of input candidates */
    search_node_t *ps_search_node;
    search_results_t *ps_search_results;
    S32 i4_num_nodes;

    /* Input and reference attributes */
    S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;

    /* The reference is actually an array of ptrs since there are several    */
    /* reference id. So an array gets passed form calling function           */
    U08 **ppu1_ref;

    /* These control number of parts and number of pts in grid to search */
    S32 i4_part_mask;  // i4_grid_mask;

    S32 shift_for_cu_size;
    /* Blk width, blk height and blk size are derived from input params */
    BLK_SIZE_T e_blk_size;
    CU_SIZE_T e_cu_size;
    S32 i4_blk_wd, i4_blk_ht;

    /*************************************************************************/
    /* These functions pointers for calculating Err and the result update    */
    /* Each carries its own parameters structure, which is generated on the  */
    /* fly in this function                                                  */
    /*************************************************************************/
    PF_CALC_SAD_AND_RESULT pf_calc_sad_and_result;
    err_prms_t s_err_prms;
    result_upd_prms_t s_result_prms;
    S32 i4_num_results;
    S32 i4_search_idx = ps_search_prms->i1_ref_idx;
    S32 i4_inp_off;
    S32 i4_num_partitions;

    i4_inp_stride = ps_search_prms->i4_inp_stride;

    /* Move to the location of the search blk in inp buffer */
    i4_inp_off = ps_search_prms->i4_cu_x_off;
    i4_inp_off += ps_search_prms->i4_cu_y_off * i4_inp_stride;

    /*************************************************************************/
    /* Depending on flag i4_use_rec, we use either input of previously       */
    /* encoded pictures or we use recon of previously encoded pictures.      */
    /*************************************************************************/
    if(ps_search_prms->i4_use_rec == 1)
    {
        i4_ref_stride = ps_layer_ctxt->i4_rec_stride;
        ppu1_ref = ps_layer_ctxt->ppu1_list_rec_fxfy;
    }
    else
    {
        i4_ref_stride = ps_layer_ctxt->i4_inp_stride;
        ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
    }
    i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
    /* Obtain the blk size of the search blk. Assumed here that the search   */
    /* is done on a CU size, rather than any arbitrary blk size.             */
    ps_search_results = ps_search_prms->ps_search_results;
    e_blk_size = ps_search_prms->e_blk_size;
    i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
    i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
    e_cu_size = ps_search_results->e_cu_size;

    /* Assuming cu size of 8x8 as enum 0, the other will be 1, 2, 3 */
    /* This will also set the shift w.r.t. the base cu size of 8x8 */
    shift_for_cu_size = e_cu_size;

    ps_search_node = ps_search_prms->ps_search_nodes;
    i4_num_nodes = ps_search_prms->i4_num_search_nodes;
    i4_part_mask = ps_search_prms->i4_part_mask;

    /*************************************************************************/
    /* This array stores the ids of the partitions whose                     */
    /* SADs are updated. Since the partitions whose SADs are updated may not */
    /* be in contiguous order, we supply another level of indirection.       */
    /*************************************************************************/
    i4_num_partitions = hme_create_valid_part_ids(i4_part_mask, pi4_valid_part_ids);

    /* Update the parameters used to pass to SAD */
    /* input ptr, strides, SAD Grid, part mask, blk width and ht */
    /* The above are fixed ptrs, only pu1_ref and grid mask are  */
    /* varying params which are updated just before calling fxn  */
    s_err_prms.i4_inp_stride = i4_inp_stride;
    s_err_prms.i4_ref_stride = i4_ref_stride;
    s_err_prms.i4_part_mask = i4_part_mask;
    s_err_prms.pi4_sad_grid = &ai4_sad_grid[0];
    s_err_prms.i4_blk_wd = i4_blk_wd;
    s_err_prms.i4_blk_ht = i4_blk_ht;
    s_err_prms.i4_step = 1;
    s_err_prms.pi4_valid_part_ids = pi4_valid_part_ids;
    s_err_prms.i4_num_partitions = i4_num_partitions;

    s_result_prms.pf_mv_cost_compute = ps_search_prms->pf_mv_cost_compute;
    s_result_prms.ps_search_results = ps_search_results;
    s_result_prms.pi4_valid_part_ids = pi4_valid_part_ids;
    s_result_prms.i1_ref_idx = (S08)ps_search_prms->i1_ref_idx;
    s_result_prms.pi4_sad_grid = ai4_sad_grid;
    s_result_prms.i4_part_mask = i4_part_mask;
    s_result_prms.i4_step = 1;

    pf_calc_sad_and_result = hme_get_calc_sad_and_result_explicit_fxn(
        ps_me_optimised_function_list,
        i4_part_mask,
        i4_num_partitions,
        i1_grid_enable,
        ps_search_results->u1_num_results_per_part);

    pf_calc_sad_and_result(
        ps_search_prms, ps_wt_inp_prms, &s_err_prms, &s_result_prms, ppu1_ref, i4_ref_stride);
}