C++程序  |  622行  |  25.77 KB

/*******************************************************************************
* Copyright 2001-2018 Intel Corporation
* All Rights Reserved.
*
* If this  software was obtained  under the  Intel Simplified  Software License,
* the following terms apply:
*
* The source code,  information  and material  ("Material") contained  herein is
* owned by Intel Corporation or its  suppliers or licensors,  and  title to such
* Material remains with Intel  Corporation or its  suppliers or  licensors.  The
* Material  contains  proprietary  information  of  Intel or  its suppliers  and
* licensors.  The Material is protected by  worldwide copyright  laws and treaty
* provisions.  No part  of  the  Material   may  be  used,  copied,  reproduced,
* modified, published,  uploaded, posted, transmitted,  distributed or disclosed
* in any way without Intel's prior express written permission.  No license under
* any patent,  copyright or other  intellectual property rights  in the Material
* is granted to  or  conferred  upon  you,  either   expressly,  by implication,
* inducement,  estoppel  or  otherwise.  Any  license   under such  intellectual
* property rights must be express and approved by Intel in writing.
*
* Unless otherwise agreed by Intel in writing,  you may not remove or alter this
* notice or  any  other  notice   embedded  in  Materials  by  Intel  or Intel's
* suppliers or licensors in any way.
*
*
* If this  software  was obtained  under the  Apache License,  Version  2.0 (the
* "License"), the following terms apply:
*
* You may  not use this  file except  in compliance  with  the License.  You may
* obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless  required  by   applicable  law  or  agreed  to  in  writing,  software
* distributed under the License  is distributed  on an  "AS IS"  BASIS,  WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*
* See the   License  for the   specific  language   governing   permissions  and
* limitations under the License.
*******************************************************************************/

#if defined( _OPENMP )
  #include <omp.h>
#endif

#include "owndefs.h"
#include "ippcpdefs.h"
#include "ippcp.h"
#ifdef _PCS
#undef _PCS
#define _MY_PCS_DISABLED
#endif
#include "dispatcher.h"
#ifdef _MY_PCS_DISABLED
#define _PCS
#endif
#if defined( _IPP_DATA )

static Ipp64u cpFeatures = 0;
static Ipp64u cpFeaturesMask = 0;

static int cpGetFeatures( Ipp64u* pFeaturesMask );
extern void IPP_CDECL cpGetReg( int* buf, int valEAX, int valECX );
extern int IPP_CDECL cp_is_avx_extension();
extern int IPP_CDECL cp_is_avx512_extension();
IppStatus owncpSetCpuFeaturesAndIdx( Ipp64u cpuFeatures, int* index );

IPPFUN( Ipp64u, ippcpGetEnabledCpuFeatures, ( void ))
{
    return cpFeaturesMask;
}

/*===================================================================*/
IPPFUN( IppStatus, ippcpGetCpuFeatures, ( Ipp64u* pFeaturesMask ))
{
  IPP_BAD_PTR1_RET( pFeaturesMask )
  {
    if( 0 != cpFeatures){
        *pFeaturesMask = cpFeatures;// & cpFeaturesMask;
    } else {
        int ret = cpGetFeatures( pFeaturesMask );
        if( !ret ) return ippStsNotSupportedCpu;
    }
    return ippStsNoErr;
  }
}

/*===================================================================*/

int cpGetFeature( Ipp64u Feature )
{
  if(( cpFeaturesMask & Feature ) == Feature ){
    return 1;
  } else {
    return 0;
  }
}

int k0_cpGetFeature( Ipp64u Feature ){
  if(( cpFeaturesMask & Feature ) == Feature ) return 1;
  else return 0; }
int n0_cpGetFeature( Ipp64u Feature ){
  if(( cpFeaturesMask & Feature ) == Feature ) return 1;
  else return 0; }
int l9_cpGetFeature( Ipp64u Feature ){
  if(( cpFeaturesMask & Feature ) == Feature ) return 1;
  else return 0; }
int e9_cpGetFeature( Ipp64u Feature ){
  if(( cpFeaturesMask & Feature ) == Feature ) return 1;
  else return 0; }
int y8_cpGetFeature( Ipp64u Feature ){
  if(( cpFeaturesMask & Feature ) == Feature ) return 1;
  else return 0; }

int h9_cpGetFeature( Ipp64u Feature ){
  if(( cpFeaturesMask & Feature ) == Feature ) return 1;
  else return 0; }
int g9_cpGetFeature( Ipp64u Feature ){
  if(( cpFeaturesMask & Feature ) == Feature ) return 1;
  else return 0; }
int p8_cpGetFeature( Ipp64u Feature ){
  if(( cpFeaturesMask & Feature ) == Feature ) return 1;
  else return 0; }

/*===================================================================*/
#define BIT00 0x00000001
#define BIT01 0x00000002
#define BIT02 0x00000004
#define BIT03 0x00000008
#define BIT04 0x00000010
#define BIT05 0x00000020
#define BIT06 0x00000040
#define BIT07 0x00000080
#define BIT08 0x00000100
#define BIT09 0x00000200
#define BIT10 0x00000400
#define BIT11 0x00000800
#define BIT12 0x00001000
#define BIT13 0x00002000
#define BIT14 0x00004000
#define BIT15 0x00008000
#define BIT16 0x00010000
#define BIT17 0x00020000
#define BIT18 0x00040000
#define BIT19 0x00080000
#define BIT20 0x00100000
#define BIT21 0x00200000
#define BIT22 0x00400000
#define BIT23 0x00800000
#define BIT24 0x01000000
#define BIT25 0x02000000
#define BIT26 0x04000000
#define BIT27 0x08000000
#define BIT28 0x10000000
#define BIT29 0x20000000
#define BIT30 0x40000000
#define BIT31 0x80000000


static int cpGetFeatures( Ipp64u* pFeaturesMask )
{
    Ipp32u  buf[4];
    Ipp32u  eax_, ebx_, ecx_, edx_, tmp;
    Ipp64u  mask;
    int flgFMA=0, flgINT=0, flgGPR=0;   // for avx2
    Ipp32u idBaseMax, idExtdMax;

    cpGetReg((int*)buf, 0, 0);          //get max value for basic info.
    idBaseMax = buf[0];
    cpGetReg((int*)buf, 0x80000000, 0); //get max value for extended info.
    idExtdMax = buf[0];

    cpGetReg( (int*)buf, 1, 0 );
    eax_ = (Ipp32u)buf[0];
    ecx_ = (Ipp32u)buf[2];
    edx_ = (Ipp32u)buf[3];
    mask = 0;
    if( edx_ & BIT23 ) mask |= ippCPUID_MMX;          // edx[23] - MMX(TM) Technology
    if( edx_ & BIT25 ) mask |= ippCPUID_SSE;          // edx[25] - Intel(R) Streaming SIMD Extensions (Intel(R) SSE)
    if( edx_ & BIT26 ) mask |= ippCPUID_SSE2;         // edx[26] - Intel(R) Streaming SIMD Extensions 2 (Intel(R) SSE2)
    if( ecx_ & BIT00 ) mask |= ippCPUID_SSE3;         // ecx[0]  - Intel(R) Streaming SIMD Extensions 3 (Intel(R) SSE3) (formerly codenamed Prescott)
    if( ecx_ & BIT09 ) mask |= ippCPUID_SSSE3;        // ecx[9]  - Supplemental Streaming SIMD Extensions 3 (SSSE3) (formerly codenamed Merom)
    if( ecx_ & BIT22 ) mask |= ippCPUID_MOVBE;        // ecx[22] - Intel(R) instruction MOVBE (Intel Atom(R) processor)
    if( ecx_ & BIT19 ) mask |= ippCPUID_SSE41;        // ecx[19] - Intel(R) Streaming SIMD Extensions 4.1 (Intel(R) SSE4.1) (formerly codenamed Penryn)
    if( ecx_ & BIT20 ) mask |= ippCPUID_SSE42;        // ecx[20] - Intel(R) Streaming SIMD Extensions 4.2 (Intel(R) SSE4.2) (formerly codenamed Nenalem)
    if( ecx_ & BIT28 ) mask |= ippCPUID_AVX;          // ecx[28] - Intel(R) Advanced Vector Extensions (Intel(R) AVX) (formerly codenamed Sandy Bridge)
    if(( ecx_ & 0x18000000 ) == 0x18000000 ){
        tmp = (Ipp32u)cp_is_avx_extension();
        if( tmp & BIT00 ) mask |= ippAVX_ENABLEDBYOS; // Intel(R) AVX is supported by OS
    }
    if( ecx_ & BIT25 ) mask |= ippCPUID_AES;          // ecx[25] - Intel(R) AES New Instructions
    if( ecx_ & BIT01 ) mask |= ippCPUID_CLMUL;        // ecx[1]  - Intel(R) instruction PCLMULQDQ
    if( ecx_ & BIT30 ) mask |= ippCPUID_RDRAND;       // ecx[30] - Intel(R) instruction RDRRAND
    if( ecx_ & BIT29 ) mask |= ippCPUID_F16C;         // ecx[29] - Intel(R) instruction F16C
         // Intel(R) AVX2 instructions extention: only if 3 features are enabled at once:
         // FMA, Intel(R) AVX 256 int & GPR BMI (bit-manipulation);
    if( ecx_ & BIT12 ) flgFMA = 1; else flgFMA = 0;   // ecx[12] - FMA 128 & 256 bit
    if( idBaseMax >= 7 ){                             // get CPUID.eax = 7
       cpGetReg( (int*)buf, 0x7, 0 );
       ebx_ = (Ipp32u)buf[1];
       ecx_ = (Ipp32u)buf[2];
       edx_ = (Ipp32u)buf[3];
       if( ebx_ & BIT05 ) flgINT = 1;
       else flgINT = 0;                               //ebx[5], Intel(R) Advanced Vector Extensions 2 (Intel(R) AVX2) (int 256bits)
           // ebx[3] - enabled ANDN, BEXTR, BLSI, BLSMK, BLSR, TZCNT
           // ebx[8] - enabled BZHI, MULX, PDEP, PEXT, RORX, SARX, SHLX, SHRX
       if(( ebx_ & BIT03 )&&( ebx_ & BIT08 )) flgGPR = 1;
       else flgGPR = 0;                               // VEX-encoded GPR instructions (GPR BMI)
           // Intel(R) architecture formerly codenamed Broadwell instructions extention
       if( ebx_ & BIT19 ) mask |= ippCPUID_ADCOX;     // eax[0x7] -->> ebx:: Bit 19: Intel(R) instructions ADOX/ADCX
       if( ebx_ & BIT18 ) mask |= ippCPUID_RDSEED;    // eax[0x7] -->> ebx:: Bit 18: Intel(R) instruction RDSEED
       if( ebx_ & BIT29 ) mask |= ippCPUID_SHA;       // eax[0x7] -->> ebx:: Bit 29: Intel(R) Secure Hash Algorithm Extensions
           // Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX-512) extention
       if(cp_is_avx512_extension()){
           mask |= ippAVX512_ENABLEDBYOS;             // Intel(R) AVX-512 is supported by OS
       }
       if( ebx_ & BIT16 ) mask |= ippCPUID_AVX512F;   // ebx[16] - Intel(R) AVX-512 Foundation
       if( ebx_ & BIT26 ) mask |= ippCPUID_AVX512PF;  // ebx[26] - Intel(R) AVX-512 Prefetch instructions
       if( ebx_ & BIT27 ) mask |= ippCPUID_AVX512ER;  // ebx[27] - Intel(R) AVX-512 Exponential and Reciprocal instructions
       if( ebx_ & BIT28 ) mask |= ippCPUID_AVX512CD;  // ebx[28] - Intel(R) AVX-512 Conflict Detection
       if( ebx_ & BIT17 ) mask |= ippCPUID_AVX512DQ;  // ebx[17] - Intel(R) AVX-512 Dword & Quadword
       if( ebx_ & BIT30 ) mask |= ippCPUID_AVX512BW;  // ebx[30] - Intel(R) AVX-512 Byte & Word
       if( ebx_ & BIT31 ) mask |= ippCPUID_AVX512VL;  // ebx[31] - Intel(R) AVX-512 Vector Length extensions
       if( ecx_ & BIT01 ) mask |= ippCPUID_AVX512VBMI; // ecx[01] - Intel(R) AVX-512 Vector Byte Manipulation Instructions
       if( edx_ & BIT02 ) mask |= ippCPUID_AVX512_4VNNIW; // edx[02] - Intel(R) AVX-512 Vector instructions for deep learning enhanced word variable precision
       if( edx_ & BIT03 ) mask |= ippCPUID_AVX512_4FMADDPS; // edx[03] - Intel(R) AVX-512 Vector instructions for deep learning floating-point single precision
       // bitwise OR between ippCPUID_MPX & ippCPUID_AVX flags can be used to define that arch is GE than formerly codenamed Skylake
       if( ebx_ & BIT14 ) mask |= ippCPUID_MPX;       // ebx[14] - Intel(R) Memory Protection Extensions (Intel(R) MPX)
       if( ebx_ & BIT21 ) mask |= ippCPUID_AVX512IFMA;  // ebx[21] - Intel(R) AVX-512 IFMA PMADD52
    }
    mask = ( flgFMA && flgINT && flgGPR ) ? (mask | ippCPUID_AVX2) : mask; // to separate Intel(R) AVX2 flags here

    if( idExtdMax >= 0x80000001 ){ // get CPUID.eax=0x80000001
       cpGetReg( (int*)buf, 0x80000001, 0 );
       ecx_ = (Ipp32u)buf[2];
           // Intel(R) architecture formerly codenamed Broadwell instructions extention
       if( ecx_ & BIT08 ) mask |= ippCPUID_PREFETCHW; // eax[0x80000001] -->> ecx:: Bit 8: Intel(R) instruction PREFETCHW
    }
       // Intel(R) architecture formerly codenamed Knights Corner
    if(((( eax_ << 20 ) >> 24 ) ^ 0xb1 ) == 0 ){
        mask = mask | ippCPUID_KNC;
    }
    cpFeatures = mask;
    cpFeaturesMask = mask; /* all CPU features are enabled by default */
    *pFeaturesMask = cpFeatures;
    return 1; /* if somebody need to check for cpuid support - do it at the top of function and return 0 if it's not supported */
}

int ippcpJumpIndexForMergedLibs = -1;
static int cpthreads_omp_of_n_ipp = 1;

IPPFUN( int, ippcpGetEnabledNumThreads,( void ))
{
    return cpthreads_omp_of_n_ipp;
}


#define AVX3X_FEATURES ( ippCPUID_AVX512F|ippCPUID_AVX512CD|ippCPUID_AVX512VL|ippCPUID_AVX512BW|ippCPUID_AVX512DQ )
#define AVX3M_FEATURES ( ippCPUID_AVX512F|ippCPUID_AVX512CD|ippCPUID_AVX512PF|ippCPUID_AVX512ER )
// AVX3X_FEATURES means Intel(R) Xeon(R) processor
// AVX3M_FEATURES means Intel(R) Many Integrated Core Architecture


IppStatus owncpFeaturesToIdx(  Ipp64u* cpuFeatures, int* index )
{
   IppStatus ownStatus = ippStsNoErr;
   Ipp64u    mask = 0;

   *index = 0;

   if(( AVX3X_FEATURES  == ( *cpuFeatures & AVX3X_FEATURES  ))&&
      ( ippAVX512_ENABLEDBYOS & cpFeatures )){                         /* Intel(R) architecture formerlySkylake ia32=S0, x64=K0 */
         mask = AVX3X_MSK;
         *index = LIB_AVX3X;
   } else
   if(( AVX3M_FEATURES  == ( *cpuFeatures & AVX3M_FEATURES  ))&&
      ( ippAVX512_ENABLEDBYOS & cpFeatures )){                         /* Intel(R) architecture formerly codenamed Knights Landing ia32=i0, x64=N0 */
       mask = AVX3M_MSK;
       *index = LIB_AVX3M;
   } else
   if(( ippCPUID_AVX2  == ( *cpuFeatures & ippCPUID_AVX2  ))&&
      ( ippAVX_ENABLEDBYOS & cpFeatures )){                            /* Intel(R) architecture formerly codenamed Haswell ia32=H9, x64=L9 */
       mask = AVX2_MSK;
       *index = LIB_AVX2;
   } else
   if(( ippCPUID_AVX   == ( *cpuFeatures & ippCPUID_AVX   ))&&
      ( ippAVX_ENABLEDBYOS & cpFeatures )){                            /* Intel(R) architecture formerly codenamed Sandy Bridge ia32=G9, x64=E9 */
       mask = AVX_MSK;
       *index = LIB_AVX;
   } else
   if( ippCPUID_SSE42 == ( *cpuFeatures & ippCPUID_SSE42 )){           /* Intel(R) architecture formerly codenamed Nehalem or Intel(R) architecture formerly codenamed Westmer = Intel(R) architecture formerly codenamed Penryn + Intel(R) SSE4.2 + ?Intel(R) instruction PCLMULQDQ + ?(Intel(R) AES New Instructions) + ?(Intel(R) Secure Hash Algorithm Extensions) */
       mask = SSE42_MSK;                                               /* or new Intel Atom(R) processor formerly codenamed Silvermont */
       *index = LIB_SSE42;
   } else
   if( ippCPUID_SSE41 == ( *cpuFeatures & ippCPUID_SSE41 )){           /* Intel(R) architecture formerly codenamed Penryn ia32=P8, x64=Y8 */
       mask = SSE41_MSK;
       *index = LIB_SSE41;
   } else
   if( ippCPUID_MOVBE == ( *cpuFeatures & ippCPUID_MOVBE )) {          /* Intel Atom(R) processor formerly codenamed Silverthorne ia32=S8, x64=N8 */
       mask = ATOM_MSK;
       *index = LIB_ATOM;
   } else
   if( ippCPUID_SSSE3 == ( *cpuFeatures & ippCPUID_SSSE3 )) {          /* Intel(R) architecture formerly codenamed Merom ia32=V8, x64=U8 (letters etymology is unknown) */
       mask = SSSE3_MSK;
       *index = LIB_SSSE3;
   } else
   if( ippCPUID_SSE3  == ( *cpuFeatures & ippCPUID_SSE3  )) {          /* Intel(R) architecture formerly codenamed Prescott ia32=W7, x64=M7 */
       mask = SSE3_MSK;
       *index = LIB_SSE3;
   } else
   if( ippCPUID_SSE2  == ( *cpuFeatures & ippCPUID_SSE2  )) {          /* Intel(R) architecture formerly codenamed Willamette ia32=W7, x64=PX */
       mask = SSE2_MSK;
       *index = LIB_SSE2;
   } else
   if( ippCPUID_SSE   == ( *cpuFeatures & ippCPUID_SSE   )) {          /* Intel(R) Pentium(R) processor III ia32=PX only */
       mask = SSE_MSK;
       *index = LIB_SSE;
#if (defined( _WIN32E ) || defined( linux32e ) || defined( OSXEM64T )) && !(defined( _ARCH_LRB2 ))
       ownStatus = ippStsNotSupportedCpu;                              /* the lowest CPU supported by Intel(R) Integrated Performance Primitives (Intel(R) IPP) must at least support Intel(R) SSE2 for x64 */
#endif
   } else
   if( ippCPUID_MMX   >= ( *cpuFeatures & ippCPUID_MMX   )) {          /* not supported, PX dispatched */
       mask = MMX_MSK;
       *index = LIB_MMX;
       ownStatus = ippStsNotSupportedCpu; /* the lowest CPU supported by Intel(R) IPP must at least support Intel(R) SSE for ia32 or Intel(R) SSE2 for x64 */
   }
#if defined ( _IPP_QUARK)
     else {
       mask = PX_MSK;
       *index = LIB_PX;
       ownStatus = ippStsNoErr; /* the lowest CPU supported by Intel(R) IPP must at least support Intel(R) SSE for ia32 or Intel(R) SSE2 for x64 */
   }
#endif

    if(( mask != ( *cpuFeatures & mask ))&&( ownStatus == ippStsNoErr ))
        ownStatus = ippStsFeaturesCombination; /* warning if combination of features is incomplete */
   *cpuFeatures |= mask;
   return ownStatus;
}

#ifdef _PCS

extern IppStatus (IPP_STDCALL *pcpSetCpuFeatures)( Ipp64u cpuFeatures );
extern IppStatus (IPP_STDCALL *pcpSetNumThreads)( int numThr );
extern IppStatus (IPP_STDCALL *pcpGetNumThreads)( int* pNumThr );

IPPFUN( IppStatus, ippcpSetNumThreads, ( int numThr ))
{
   IppStatus status = ippStsNoErr;

   if (pcpSetNumThreads != 0)
   {
      status = pcpSetNumThreads(numThr);
      if (status == ippStsNoErr)
      {
          cpthreads_omp_of_n_ipp = numThr;
      }
   }
   return status;
}

IPPFUN( IppStatus, ippcpGetNumThreads, (int* pNumThr) )
{
   IppStatus status = ippStsNoErr;

   IPP_BAD_PTR1_RET( pNumThr )

   if (pcpGetNumThreads != 0)
   {
      status = pcpGetNumThreads(pNumThr);
   }
   return status;
}
#else


IPPFUN( IppStatus, ippcpSetNumThreads, ( int numThr ))
{
   IppStatus status = ippStsNoErr;
#if defined( _OPENMP )
   IPP_BAD_SIZE_RET( numThr )
   cpthreads_omp_of_n_ipp = numThr;
   status = ippStsNoErr;
#else
   UNREFERENCED_PARAMETER(numThr);
   status = ippStsNoOperation;
#endif
   return status;
}

IPPFUN( IppStatus, ippcpGetNumThreads, (int* pNumThr) )
{
   IppStatus status = ippStsNoErr;
   IPP_BAD_PTR1_RET( pNumThr )

#if defined( _OPENMP )
   *pNumThr = cpthreads_omp_of_n_ipp;
   status =  ippStsNoErr;
#else
   *pNumThr = 1;
   status = ippStsNoOperation;
#endif
   return status;
}

#endif /* #ifdef _PCS */

#ifdef _IPP_DYNAMIC

typedef IppStatus (IPP_STDCALL *DYN_RELOAD)( int );
static DYN_RELOAD IppDispatcher; /* ippCP only */
static int currentCpu = -1;      /* control for disabling the same DLL re-loading */

void owncpRegisterLib( DYN_RELOAD reload )
{
    pcpSetCpuFeatures = 0;
    pcpSetNumThreads  = 0;
    pcpGetNumThreads  = 0;

    IppDispatcher = reload;  /* function DynReload() that is defined in ippmain.gen - */
    return;                                                               /* therefore in each domain there is own DynReload() function */
}

void owncpUnregisterLib( void )
{
   IppDispatcher = 0;
   currentCpu = -1;

   pcpSetCpuFeatures = 0;
   pcpSetNumThreads  = 0;
   pcpGetNumThreads  = 0;

   return;
}

IPPFUN( IppStatus, ippcpSetCpuFeatures,( Ipp64u cpuFeatures ))
{
   IppStatus status, ownStatus;
   int       index = 0;

    ownStatus = owncpSetCpuFeaturesAndIdx( cpuFeatures, &index );
    if(( IppDispatcher )&&( currentCpu != index )) {
        status = IppDispatcher( index );
        currentCpu = index;
    } else
        status = ippStsNoErr;

#ifdef _PCS
    if (pcpSetCpuFeatures != 0 && status >= ippStsNoErr)
    {
        /* Pass down features to Waterfall dll */
        status = pcpSetCpuFeatures(cpuFeatures);
    }
    if (pcpSetNumThreads != 0 && status >= ippStsNoErr)
    {
        /* Pass down features to Waterfall dll */
        status = pcpSetNumThreads(cpthreads_omp_of_n_ipp);
    }
#endif

    if( status != ippStsNoErr && status != ippStsNoOperation)
        return status;
    else
        return ownStatus;
}

IPPFUN( IppStatus, ippcpInit,( void ))
{
    int index = 0;
    IppStatus status, statusf, statusi;
    Ipp64u    cpuFeatures;

    statusf = ippcpGetCpuFeatures( &cpuFeatures );
    statusi = owncpSetCpuFeaturesAndIdx( cpuFeatures, &index ); /* ownSetFeatures instead of ippSetFeatures because need unconditional initialization, */
    if( IppDispatcher ) status = IppDispatcher( index ); /* call DynReload() function for each domain */
    else status = ippStsNoErr;
    currentCpu = index;
    if( ippStsNoErr != statusf ) return statusf;
    if( ippStsNoErr != statusi ) return statusi;
    if( ippStsNoErr != status ) return status;
    return ippStsNoErr;
}


#else /* _IPP_DYNAMIC */

IPPFUN( IppStatus, ippcpInit,( void ))
{
    Ipp64u     cpuFeatures;

#if defined( _OPENMP )
    ippcpSetNumThreads( IPP_MIN( omp_get_num_procs(), omp_get_max_threads()));
#endif
    ippcpGetCpuFeatures( &cpuFeatures );
    return ippcpSetCpuFeatures( cpuFeatures );
}


IPPFUN( IppStatus, ippcpSetCpuFeatures,( Ipp64u cpuFeatures ))
{
   IppStatus ownStatus;
   int       index = 0;

#if defined( _OPENMP )
    ippcpSetNumThreads( IPP_MIN( omp_get_num_procs(), omp_get_max_threads()));
#endif
    ownStatus = owncpSetCpuFeaturesAndIdx( cpuFeatures, &index );
    ippcpJumpIndexForMergedLibs = index;
    cpFeaturesMask = cpuFeatures;
    return ownStatus;
}

#endif

IppStatus owncpSetCpuFeaturesAndIdx( Ipp64u cpuFeatures, int* index )
{
    Ipp64u    tmp;
    IppStatus tmpStatus;
    *index = 0;

    if( ippCPUID_NOCHECK & cpuFeatures ){
    // if NOCHECK is set - static variable cpFeatures is initialized unconditionally and real CPU features from CPUID are ignored;
    // the one who uses this method of initialization must understand what and why it does and the possible unpredictable consequences.
    // the only one known purpose for this approach - environments where CPUID instruction is disabled (for example Intel(R) Software Guard Extensions).
        cpuFeatures &= ( IPP_MAX_64U ^ ippCPUID_NOCHECK );
        cpFeatures = cpuFeatures;
    } else
//    if( 0 == cpFeatures ) //do cpFeatures restore unconditionally - to protect from possible previous NOCHECK
    {
    // if library has not been initialized yet
        cpGetFeatures( &tmp );
    }
    tmpStatus = owncpFeaturesToIdx( &cpuFeatures, index );
    cpFeaturesMask = cpuFeatures;

    return tmpStatus;
}

static struct {
   int sts;
   const char *msg;
} ippcpMsg[] = {
/* ippStatus */
/* -9999 */ ippStsCpuNotSupportedErr, "ippStsCpuNotSupportedErr: The target CPU is not supported",
/* -9702 */ MSG_NO_SHARED, "No shared libraries were found in the Waterfall procedure",
/* -9701 */ MSG_NO_DLL, "No DLLs were found in the Waterfall procedure",
/* -9700 */ MSG_LOAD_DLL_ERR, "Error at loading of %s library",
/* -1016 */ ippStsQuadraticNonResidueErr, "ippStsQuadraticNonResidueErr: SQRT operation on quadratic non-residue value",
/* -1015 */ ippStsPointAtInfinity, "ippStsPointAtInfinity: Point at infinity is detected",
/* -1014 */ ippStsOFBSizeErr, "ippStsOFBSizeErr: Incorrect value for crypto OFB block size",
/* -1013 */ ippStsIncompleteContextErr, "ippStsIncompleteContextErr: Crypto: set up of context is not complete",
/* -1012 */ ippStsCTRSizeErr, "ippStsCTRSizeErr: Incorrect value for crypto CTR block size",
/* -1011 */ ippStsEphemeralKeyErr, "ippStsEphemeralKeyErr: ECC: Invalid ephemeral key",
/* -1010 */ ippStsMessageErr, "ippStsMessageErr: ECC: Invalid message digest",
/* -1009 */ ippStsShareKeyErr, "ippStsShareKeyErr: ECC: Invalid share key",
/* -1008 */ ippStsIvalidPrivateKey, "ippStsIvalidPrivateKey ECC: Invalid private key",
/* -1007 */ ippStsOutOfECErr, "ippStsOutOfECErr: ECC: Point out of EC",
/* -1006 */ ippStsECCInvalidFlagErr, "ippStsECCInvalidFlagErr: ECC: Invalid Flag",
/* -1005 */ ippStsUnderRunErr, "ippStsUnderRunErr: Error in data under run",
/* -1004 */ ippStsPaddingErr, "ippStsPaddingErr: Detected padding error indicates the possible data corruption",
/* -1003 */ ippStsCFBSizeErr, "ippStsCFBSizeErr: Incorrect value for crypto CFB block size",
/* -1002 */ ippStsPaddingSchemeErr, "ippStsPaddingSchemeErr: Invalid padding scheme",
/* -1001 */ ippStsBadModulusErr, "ippStsBadModulusErr: Bad modulus caused a failure in module inversion",
/*  -216 */ ippStsUnknownStatusCodeErr, "ippStsUnknownStatusCodeErr: Unknown status code",
/*  -221 */ ippStsLoadDynErr, "ippStsLoadDynErr: Error when loading the dynamic library",
/*   -15 */ ippStsLengthErr, "ippStsLengthErr: Incorrect value for string length",
/*   -14 */ ippStsNotSupportedModeErr, "ippStsNotSupportedModeErr: The requested mode is currently not supported",
/*   -13 */ ippStsContextMatchErr, "ippStsContextMatchErr: Context parameter does not match the operation",
/*   -12 */ ippStsScaleRangeErr, "ippStsScaleRangeErr: Scale bounds are out of range",
/*   -11 */ ippStsOutOfRangeErr, "ippStsOutOfRangeErr: Argument is out of range, or point is outside the image",
/*   -10 */ ippStsDivByZeroErr, "ippStsDivByZeroErr: An attempt to divide by zero",
/*    -9 */ ippStsMemAllocErr, "ippStsMemAllocErr: Memory allocated for the operation is not enough",
/*    -8 */ ippStsNullPtrErr, "ippStsNullPtrErr: Null pointer error",
/*    -7 */ ippStsRangeErr, "ippStsRangeErr: Incorrect values for bounds: the lower bound is greater than the upper bound",
/*    -6 */ ippStsSizeErr, "ippStsSizeErr: Incorrect value for data size",
/*    -5 */ ippStsBadArgErr, "ippStsBadArgErr: Incorrect arg/param of the function",
/*    -4 */ ippStsNoMemErr, "ippStsNoMemErr: Not enough memory for the operation",
/*    -2 */ ippStsErr, "ippStsErr: Unknown/unspecified error, -2",
/*     0 */ ippStsNoErr, "ippStsNoErr: No errors",
/*     1 */ ippStsNoOperation, "ippStsNoOperation: No operation has been executed",
/*     2 */ ippStsDivByZero, "ippStsDivByZero: Zero value(s) for the divisor in the Div function",
/*    25 */ ippStsInsufficientEntropy, "ippStsInsufficientEntropy: Generation of the prime/key failed due to insufficient entropy in the random seed and stimulus bit string",
/*    36 */ ippStsNotSupportedCpu, "The CPU is not supported",
/*    36 */ ippStsFeaturesCombination, "Wrong combination of features",
};

/* /////////////////////////////////////////////////////////////////////////////
//  Name:       ippcpGetStatusString
//  Purpose:    transformation of a code of a status Intel(R) IPP to string
//  Returns:
//  Parameters:
//    StsCode   Intel(R) IPP status code
//
//  Notes:      not necessary to release the returned string
*/
IPPFUN( const char*, ippcpGetStatusString, ( IppStatus StsCode ) )
{
   unsigned int i;
   for( i=0; i<IPP_COUNT_OF( ippcpMsg ); i++ ) {
      if( StsCode == ippcpMsg[i].sts ) {
         return ippcpMsg[i].msg;
      }
   }
   return ippcpGetStatusString( ippStsUnknownStatusCodeErr );
}

extern Ipp64u IPP_CDECL cp_get_pentium_counter (void);

/* /////////////////////////////////////////////////////////////////////////////
//  Name:       ippcpGetCpuClocks
//  Purpose:    time stamp counter (TSC) register reading
//  Returns:    TSC value
//
//  Note:      An hardware exception is possible if TSC reading is not supported by
//             the current chipset
*/
IPPFUN( Ipp64u, ippcpGetCpuClocks, (void) )
{
   return (Ipp64u)cp_get_pentium_counter();
}

#endif /* _IPP_DATA */