/* * Copyright (C) 2012 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef RSD_CPU_CORE_H #define RSD_CPU_CORE_H #include "rsd_cpu.h" #include "rsSignal.h" #include "rsContext.h" #include "rsCppUtils.h" #include "rsElement.h" #include "rsScriptC.h" #include "rsCpuCoreRuntime.h" namespace android { namespace renderscript { // Whether the CPU we're running on supports SIMD instructions extern bool gArchUseSIMD; // Function types found in RenderScript code typedef void (*ReduceAccumulatorFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint8_t *accum); typedef void (*ReduceCombinerFunc_t)(uint8_t *accum, const uint8_t *other); typedef void (*ReduceInitializerFunc_t)(uint8_t *accum); typedef void (*ReduceOutConverterFunc_t)(uint8_t *out, const uint8_t *accum); typedef void (*ForEachFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint32_t outStride); typedef void (*InvokeFunc_t)(void *params); typedef void (*InitOrDtorFunc_t)(void); typedef int (*RootFunc_t)(void); struct ReduceDescription { ReduceAccumulatorFunc_t accumFunc; // expanded accumulator function ReduceInitializerFunc_t initFunc; // user initializer function ReduceCombinerFunc_t combFunc; // user combiner function ReduceOutConverterFunc_t outFunc; // user outconverter function size_t accumSize; // accumulator datum size, in bytes }; // Internal driver callback used to execute a kernel typedef void (*WorkerCallback_t)(void *usr, uint32_t idx); class RsdCpuScriptImpl; class RsdCpuReferenceImpl; struct ScriptTLSStruct { android::renderscript::Context * mContext; const android::renderscript::Script * mScript; RsdCpuScriptImpl *mImpl; }; // MTLaunchStruct passes information about a multithreaded kernel launch. struct MTLaunchStructCommon { RsdCpuReferenceImpl *rs; RsdCpuScriptImpl *script; uint32_t mSliceSize; volatile int mSliceNum; bool isThreadable; // Boundary information about the launch RsLaunchDimensions start; RsLaunchDimensions end; // Points to MTLaunchStructForEach::fep::dim or // MTLaunchStructReduce::redp::dim. RsLaunchDimensions *dimPtr; }; struct MTLaunchStructForEach : public MTLaunchStructCommon { // Driver info structure RsExpandKernelDriverInfo fep; ForEachFunc_t kernel; const Allocation *ains[RS_KERNEL_INPUT_LIMIT]; Allocation *aout[RS_KERNEL_INPUT_LIMIT]; }; struct MTLaunchStructReduce : public MTLaunchStructCommon { // Driver info structure RsExpandKernelDriverInfo redp; const Allocation *ains[RS_KERNEL_INPUT_LIMIT]; ReduceAccumulatorFunc_t accumFunc; ReduceInitializerFunc_t initFunc; ReduceCombinerFunc_t combFunc; ReduceOutConverterFunc_t outFunc; size_t accumSize; // accumulator datum size in bytes size_t accumStride; // stride between accumulators in accumAlloc (below) // These fields are used for managing accumulator data items in a // multithreaded execution. // // Let the number of threads be N. // Let Outc be true iff there is an outconverter. // // accumAlloc is a pointer to a single allocation of (N - !Outc) // accumulators. (If there is no outconverter, then the output // allocation acts as an accumulator.) It is created at kernel // launch time. Within that allocation, the distance between the // start of adjacent accumulators is accumStride bytes -- this // might be the same as accumSize, or it might be larger, if we // are attempting to avoid false sharing. // // accumCount is an atomic counter of how many accumulators have // been grabbed by threads. It is initialized to zero at kernel // launch time. See accumPtr for further description. // // accumPtr is pointer to an array of N pointers to accumulators. // The array is created at kernel launch time, and each element is // initialized to nullptr. When a particular thread goes to work, // that thread obtains its accumulator from its entry in this // array. If the entry is nullptr, that thread needs to obtain an // accumulator, and initialize its entry in the array accordingly. // It does so via atomic access (fetch-and-add) to accumCount. // - If Outc, then the fetched value is used as an index into // accumAlloc. // - If !Outc, then // - If the fetched value is zero, then this thread gets the // output allocation for its accumulator. // - If the fetched value is nonzero, then (fetched value - 1) // is used as an index into accumAlloc. uint8_t *accumAlloc; uint8_t **accumPtr; uint32_t accumCount; // Logging control uint32_t logReduce; }; class RsdCpuReferenceImpl : public RsdCpuReference { public: ~RsdCpuReferenceImpl() override; RsdCpuReferenceImpl(Context *); void lockMutex(); void unlockMutex(); bool init(uint32_t version_major, uint32_t version_minor, sym_lookup_t, script_lookup_t); void setPriority(int32_t priority) override; virtual void launchThreads(WorkerCallback_t cbk, void *data); static void * helperThreadProc(void *vrsc); RsdCpuScriptImpl * setTLS(RsdCpuScriptImpl *sc); Context * getContext() {return mRSC;} uint32_t getThreadCount() const { return mWorkers.mCount + 1; } // Launch foreach kernel void launchForEach(const Allocation **ains, uint32_t inLen, Allocation *aout, const RsScriptCall *sc, MTLaunchStructForEach *mtls); // Launch a general reduce kernel void launchReduce(const Allocation ** ains, uint32_t inLen, Allocation *aout, MTLaunchStructReduce *mtls); CpuScript * createScript(const ScriptC *s, char const *resName, char const *cacheDir, uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags) override; CpuScript * createIntrinsic(const Script *s, RsScriptIntrinsicID iid, Element *e) override; void* createScriptGroup(const ScriptGroupBase *sg) override; const RsdCpuReference::CpuSymbol *symLookup(const char *); RsdCpuReference::CpuScript *lookupScript(const Script *s) { return mScriptLookupFn(mRSC, s); } void setSelectRTCallback(RSSelectRTCallback pSelectRTCallback) { mSelectRTCallback = pSelectRTCallback; } RSSelectRTCallback getSelectRTCallback() { return mSelectRTCallback; } virtual void setBccPluginName(const char *name) { mBccPluginName.setTo(name); } virtual const char *getBccPluginName() const { return mBccPluginName.string(); } bool getInKernel() override { return mInKernel; } // Set to true if we should embed global variable information in the code. void setEmbedGlobalInfo(bool v) override { mEmbedGlobalInfo = v; } // Returns true if we should embed global variable information in the code. bool getEmbedGlobalInfo() const override { return mEmbedGlobalInfo; } // Set to true if we should skip constant (immutable) global variables when // potentially embedding information about globals. void setEmbedGlobalInfoSkipConstant(bool v) override { mEmbedGlobalInfoSkipConstant = v; } // Returns true if we should skip constant (immutable) global variables when // potentially embedding information about globals. bool getEmbedGlobalInfoSkipConstant() const override { return mEmbedGlobalInfoSkipConstant; } protected: Context *mRSC; uint32_t version_major; uint32_t version_minor; //bool mHasGraphics; bool mInKernel; // Is a parallel kernel execution underway? struct Workers { volatile int mRunningCount; volatile int mLaunchCount; uint32_t mCount; pthread_t *mThreadId; pid_t *mNativeThreadId; Signal mCompleteSignal; Signal *mLaunchSignals; WorkerCallback_t mLaunchCallback; void *mLaunchData; }; Workers mWorkers; bool mExit; sym_lookup_t mSymLookupFn; script_lookup_t mScriptLookupFn; ScriptTLSStruct mTlsStruct; RSSelectRTCallback mSelectRTCallback; String8 mBccPluginName; // Specifies whether we should embed global variable information in the // code via special RS variables that can be examined later by the driver. // Defaults to true. bool mEmbedGlobalInfo; // Specifies whether we should skip constant (immutable) global variables // when potentially embedding information about globals. // Defaults to true. bool mEmbedGlobalInfoSkipConstant; long mPageSize; // Launch a general reduce kernel void launchReduceSerial(const Allocation ** ains, uint32_t inLen, Allocation *aout, MTLaunchStructReduce *mtls); void launchReduceParallel(const Allocation ** ains, uint32_t inLen, Allocation *aout, MTLaunchStructReduce *mtls); }; } } #endif