/****************************************************************************** * * Copyright (C) 2015 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************************** * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore */ /** ******************************************************************************* * @file * ideint_cac_ssse3.c * * @brief * This file include the definitions of the combing artifact check function * of the de-interlacer and some variant of that. * * @author * Ittiam * * @par List of Functions: * cac_4x8() * ideint_cac() * * @remarks * In the de-interlacer workspace, cac is not a seperate assembly module as * it comes along with the de_int_decision() function. But in C-Model, to * keep the things cleaner, it was made to be a separate function during * cac experiments long after the assembly was written by Mudit. * ******************************************************************************* */ /*****************************************************************************/ /* File Includes */ /*****************************************************************************/ /* System include files */ #include <stdio.h> #include <stdint.h> #include <string.h> #include <stdlib.h> #include <immintrin.h> /* User include files */ #include "icv_datatypes.h" #include "icv_macros.h" #include "icv.h" #include "icv_variance.h" #include "icv_sad.h" #include "ideint.h" #include "ideint_defs.h" #include "ideint_structs.h" #include "ideint_cac.h" /** ******************************************************************************* * * @brief * Combing artifact check function for 8x8 block * * @par Description * Determines CAC for 8x8 block by calling 8x4 CAC function * * @param[in] pu1_top * Top field * * @param[in] pu1_bot * Bottom field * * @param[in] top_strd * Top field Stride * * @param[in] bot_strd * Bottom field stride * * @returns * combing artifact flag (1 = detected, 0 = not detected) * * @remarks * ******************************************************************************* */ WORD32 ideint_cac_8x8_ssse3(UWORD8 *pu1_top, UWORD8 *pu1_bot, WORD32 top_strd, WORD32 bot_strd) { WORD32 ca; /* combing artifact result */ WORD32 i; WORD32 adj[2] = {0}; WORD32 alt[2] = {0}; WORD32 sum_1, sum_2, sum_3, sum_4; WORD32 sum_diff, diff_sum; __m128i top[4]; __m128i bot[4]; __m128i sum_t[4]; __m128i sum_b[4]; __m128i zero; zero = _mm_setzero_si128(); for(i = 0; i < 4; i++) { /* Load top */ top[i] = (__m128i)_mm_loadl_epi64((__m128i *) (pu1_top)); pu1_top += top_strd; /* Load bottom */ bot[i] = (__m128i)_mm_loadl_epi64((__m128i *) (pu1_bot)); pu1_bot += bot_strd; /* Unpack */ top[i] = _mm_unpacklo_epi8(top[i], zero); bot[i] = _mm_unpacklo_epi8(bot[i], zero); /* Compute row sums */ sum_t[i] = _mm_sad_epu8(top[i], zero); sum_b[i] = _mm_sad_epu8(bot[i], zero); } /* Compute row based alt and adj */ for(i = 0; i < 4; i += 2) { sum_1 = _mm_cvtsi128_si32(sum_t[i + 0]); sum_2 = _mm_cvtsi128_si32(sum_b[i + 0]); sum_diff = ABS_DIF(sum_1, sum_2); if(sum_diff >= RSUM_CSUM_THRESH) adj[0] += sum_diff; sum_3 = _mm_cvtsi128_si32(sum_t[i + 1]); sum_4 = _mm_cvtsi128_si32(sum_b[i + 1]); sum_diff = ABS_DIF(sum_3, sum_4); if(sum_diff >= RSUM_CSUM_THRESH) adj[0] += sum_diff; alt[0] += ABS_DIF(sum_1, sum_3); alt[0] += ABS_DIF(sum_2, sum_4); sum_1 = _mm_cvtsi128_si32(_mm_srli_si128(sum_t[i + 0], 8)); sum_2 = _mm_cvtsi128_si32(_mm_srli_si128(sum_b[i + 0], 8)); sum_diff = ABS_DIF(sum_1, sum_2); if(sum_diff >= RSUM_CSUM_THRESH) adj[1] += sum_diff; sum_3 = _mm_cvtsi128_si32(_mm_srli_si128(sum_t[i + 1], 8)); sum_4 = _mm_cvtsi128_si32(_mm_srli_si128(sum_b[i + 1], 8)); sum_diff = ABS_DIF(sum_3, sum_4); if(sum_diff >= RSUM_CSUM_THRESH) adj[1] += sum_diff; alt[1] += ABS_DIF(sum_1, sum_3); alt[1] += ABS_DIF(sum_2, sum_4); } /* Compute column based adj */ { __m128i avg1, avg2; __m128i top_avg, bot_avg; __m128i min, max, diff, thresh; __m128i mask; avg1 = _mm_avg_epu8(top[0], top[1]); avg2 = _mm_avg_epu8(top[2], top[3]); top_avg = _mm_avg_epu8(avg1, avg2); avg1 = _mm_avg_epu8(bot[0], bot[1]); avg2 = _mm_avg_epu8(bot[2], bot[3]); bot_avg = _mm_avg_epu8(avg1, avg2); min = _mm_min_epu8(top_avg, bot_avg); max = _mm_max_epu8(top_avg, bot_avg); diff = _mm_sub_epi16(max, min); thresh = _mm_set1_epi16((RSUM_CSUM_THRESH >> 2) - 1); mask = _mm_cmpgt_epi16(diff, thresh); diff = _mm_and_si128(diff, mask); diff_sum = _mm_extract_epi16(diff, 0); diff_sum += _mm_extract_epi16(diff, 1); diff_sum += _mm_extract_epi16(diff, 2); diff_sum += _mm_extract_epi16(diff, 3); adj[0] += diff_sum << 2; diff_sum = _mm_extract_epi16(diff, 4); diff_sum += _mm_extract_epi16(diff, 5); diff_sum += _mm_extract_epi16(diff, 6); diff_sum += _mm_extract_epi16(diff, 7); adj[1] += diff_sum << 2; } /* Compute column based alt */ { __m128i avg1, avg2; __m128i even_avg, odd_avg, diff; avg1 = _mm_avg_epu8(top[0], bot[0]); avg2 = _mm_avg_epu8(top[2], bot[2]); even_avg = _mm_avg_epu8(avg1, avg2); avg1 = _mm_avg_epu8(top[1], bot[1]); avg2 = _mm_avg_epu8(top[3], bot[3]); odd_avg = _mm_avg_epu8(avg1, avg2); diff = _mm_sad_epu8(even_avg, odd_avg); diff_sum = _mm_cvtsi128_si32(diff); alt[0] += diff_sum << 2; diff_sum = _mm_cvtsi128_si32(_mm_srli_si128(diff, 8)); alt[1] += diff_sum << 2; } alt[0] += (alt[0] >> SAD_BIAS_MULT_SHIFT) + (SAD_BIAS_ADDITIVE >> 1); alt[1] += (alt[1] >> SAD_BIAS_MULT_SHIFT) + (SAD_BIAS_ADDITIVE >> 1); ca = (alt[0] < adj[0]); ca |= (alt[1] < adj[1]); return ca; }