// Copyright 2015 The Gemmlowp Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // kernel_default.h: Chooses default GEMM and GEMV kernels for the // host platform. #ifndef GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_ #define GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_ #include "../public/bit_depth.h" #include "common.h" #include "kernel_reference.h" namespace gemmlowp { template <bool MaxProductIsLessThan4096, bool LhsAlwaysNonzero> struct DefaultKernelImpl {}; // Partial specialization implementing the logic that if we want to use // a kernel for LhsAlwaysNonzero but do not have such a kernel, then we fall // back to a generic kernel not taking advantage of LhsAlwaysNonzero. template <bool LhsAlwaysNonzero> struct DefaultKernelImpl<true, LhsAlwaysNonzero> : DefaultKernelImpl<false, LhsAlwaysNonzero> {}; // Partial specialization implementing the logic that if we want to use // a kernel for MaxProductIsLessThan4096 but do not have such a kernel, then we // fall back to a generic kernel not taking advantage of // MaxProductIsLessThan4096. template <bool MaxProductIsLessThan4096> struct DefaultKernelImpl<MaxProductIsLessThan4096, true> : DefaultKernelImpl<MaxProductIsLessThan4096, false> {}; template <typename BitDepthParams> struct DefaultKernel : DefaultKernelImpl<(BitDepthParams::LhsRange::kMaxValue * BitDepthParams::RhsRange::kMaxValue < 4096), (BitDepthParams::LhsRange::kMinValue > 0)> {}; } // end namespace gemmlowp #define GEMMLOWP_SET_DEFAULT_KERNEL(MaxProductIsLessThan4096, \ LhsAlwaysNonzero, Kernel) \ namespace gemmlowp { \ template <> \ struct DefaultKernelImpl<MaxProductIsLessThan4096, LhsAlwaysNonzero> \ : Kernel {}; \ } #if defined GEMMLOWP_NEON_32 #include "kernel_neon.h" GEMMLOWP_SET_DEFAULT_KERNEL(false, false, NEON_32_Kernel12x4Depth2) GEMMLOWP_SET_DEFAULT_KERNEL(true, false, NEON_32_Kernel12x4Depth2Assuming12BitProducts) GEMMLOWP_SET_DEFAULT_KERNEL(false, true, NEON_32bit_GEMM_Int8Operands_LhsNonzero) #elif defined GEMMLOWP_NEON_64 #include "kernel_neon.h" GEMMLOWP_SET_DEFAULT_KERNEL(false, false, NEON_64_Kernel12x8Depth2) GEMMLOWP_SET_DEFAULT_KERNEL(false, true, NEON_64bit_GEMM_Int8Operands_LhsNonzero) #elif defined(GEMMLOWP_MSA) #include "kernel_msa.h" GEMMLOWP_SET_DEFAULT_KERNEL(false, false, MSA_Kernel12x8Depth2) #elif defined GEMMLOWP_SSE4_32 #include "kernel_sse.h" GEMMLOWP_SET_DEFAULT_KERNEL(false, false, SSE4_32_Kernel4x4Depth2) #elif defined GEMMLOWP_SSE4_64 #include "kernel_sse.h" GEMMLOWP_SET_DEFAULT_KERNEL(false, false, SSE4_64_Kernel12x4Depth2) #else #include "kernel_reference.h" namespace gemmlowp { typedef ReferenceKernel<KernelFormat< KernelSideFormat<CellFormat<4, 16, CellOrder::WidthMajor>, 1>, KernelSideFormat<CellFormat<4, 16, CellOrder::WidthMajor>, 1> > > DefaultReferenceKernel; } GEMMLOWP_SET_DEFAULT_KERNEL(false, false, DefaultReferenceKernel) #endif #endif // GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_