/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Classes used to plan how to execute a model across multiple devices.
#ifndef ANDROID_ML_NN_RUNTIME_EXECUTION_PLAN_H
#define ANDROID_ML_NN_RUNTIME_EXECUTION_PLAN_H
#include "HalInterfaces.h"
#include "Memory.h"
#include "ModelBuilder.h"
#include "NeuralNetworks.h"
#include "TokenHasher.h"
#include "Utils.h"
#include "VersionedInterfaces.h"
#include <openssl/sha.h>
#include <set>
#include <string>
namespace android {
namespace nn {
class BurstBuilder;
class CompilationBuilder;
class Device;
class ExecutionBuilder;
class ExecutionPlan;
class ExecutionBurstController;
class Memory;
class StepExecutor;
class ExecutionStep {
public:
typedef std::vector<std::pair<uint32_t, uint32_t>> RemapVectorType;
typedef std::set<std::pair<uint32_t, uint32_t>> SubModelOutputSetType;
enum OperandKind { INPUT, OUTPUT };
ExecutionStep(ExecutionPlan* plan, uint32_t stepIndex, std::shared_ptr<Device> device);
int addOperation(int operationIndex, const ModelBuilder& fromModel);
int addOperand(uint32_t fromOperandIndex, uint32_t* toOperandIndex,
const ModelBuilder& fromModel, OperandKind kind);
// Each container entry is of the form (fromModel index, subModel index)
const RemapVectorType& getModelInputs() const {
return mModelInputs;
}
const RemapVectorType& getModelOutputs() const {
return mModelOutputs;
}
const RemapVectorType& getTempsAsSubModelInputs() const {
return mTempsAsSubModelInputs;
}
const SubModelOutputSetType& getTempsAsSubModelOutputs() const {
return mTempsAsSubModelOutputs;
}
const RemapVectorType& getOutputsAsSubModelInputs() const {
return mOutputsAsSubModelInputs;
}
const std::vector<uint32_t>& getOutputIndexSubModelToFromModel() const {
return mOutputIndexSubModelToFromModel;
}
const std::vector<uint32_t>& getOutputsAsSubModelInputsIndexToFromModel() const {
return mOutputsAsSubModelInputsIndexToFromModel;
}
void recordTempAsSubModelOutput(uint32_t fromModelIndex) {
const auto it = mOperandMap.find(fromModelIndex);
nnAssert(it != mOperandMap.end());
mTempsAsSubModelOutputs.insert(std::make_pair(fromModelIndex, it->second));
}
// If this step has a submodel output of unknown size, sets
// *hasOutputOfUnknownSize to true; otherwise, leaves it
// unchanged.
int finishSubModel(const ModelBuilder* fromModel, bool* hasOutputOfUnknownSize,
int32_t executionPreference);
const ModelBuilder* getSubModel() const { return &mSubModel; }
std::shared_ptr<Device> getDevice() const { return mDevice; }
// only available after calling finishSubModel()
std::shared_ptr<VersionedIPreparedModel> getPreparedSubModel() const {
return mPreparedSubModel;
}
// Map inputs and outputs from ExecutionBuilder to StepExecutor.
void mapInputsAndOutputs(std::shared_ptr<StepExecutor> stepExecutor) const;
void dump() const;
// For test only, get the transformed cache token.
const uint8_t* forTest_getCacheToken() const { return mToken.getCacheToken(); }
private:
void logSubModel() const;
// TODO: Some of the data is working state information that
// shouldn't be needed after we've constructed but not executed
// the step.
ExecutionPlan* mPlan;
uint32_t mIndex; // index of step within plan
ModelBuilder mSubModel;
std::shared_ptr<Device> mDevice;
std::shared_ptr<VersionedIPreparedModel> mPreparedSubModel; // not used for CPU
// Inputs of original model that are also inputs of this submodel:
// (fromModel index, subModel index)
RemapVectorType mModelInputs;
// Outputs of original model that are also outputs of this submodel:
// (fromModel index, subModel index)
RemapVectorType mModelOutputs;
// Temporaries of original model that are inputs of this submodel:
// (fromModel index, subModel index)
RemapVectorType mTempsAsSubModelInputs;
// Temporaries of original model that are outputs of this submodel:
// (fromModel index, subModel index)
SubModelOutputSetType mTempsAsSubModelOutputs;
// Outputs of original model that are inputs of this submodel:
// (fromModel index, subModel index)
RemapVectorType mOutputsAsSubModelInputs;
// Converts operand indexes from the main model to the submodel.
std::unordered_map<uint32_t, uint32_t> mOperandMap;
// Converts input indexes from the submodel to the main model
// (these are input indexes, not operand indexes). This vector
// only describes inputs of the submodel that are also inputs of
// the main model -- that is, mModelInputs but not mTempsAsSubModelInputs.
std::vector<uint32_t> mInputIndexSubModelToFromModel;
// Converts output indexes from the submodel to the main model
// (these are output indexes, not operand indexes). This vector
// only describes outputs of the submodel that are also outputs of
// the main model -- that is, mModelOutputs but not mTempsAsSubModelOutputs.
std::vector<uint32_t> mOutputIndexSubModelToFromModel;
// Converts indexes into mOutputsAsSubModelInputs to indexes into
// main model outputs (these are input and output indexes, not
// operand indexes). To be specific, if the main model outputs
// are mainModelOutputs,
//
// mOutputsAsSubModelInputsIndexToFromModel.size() ==
// mOutputsAsSubModelInputs.size()
//
// and when (0 <= i < mOutputsAsSubModelInputs.size()),
//
// mainModelOutputs[mOutputsAsSubModelInputsIndexToFromModel[i]] ==
// mOutputsAsSubModelInputs[i].first
std::vector<uint32_t> mOutputsAsSubModelInputsIndexToFromModel;
// The compilation caching token.
TokenHasher mToken;
};
class ExecutionPlan {
public:
ExecutionPlan(const ExecutionPlan&) = delete;
ExecutionPlan& operator=(const ExecutionPlan&) = delete;
ExecutionPlan() { }
~ExecutionPlan() { delete mBody; }
// Controller is part of the interface to a mechanism for
// performing an execution in N steps.
//
// Usage pattern:
// - Instantiate Controller with ExecutionPlan::makeController().
// - Call ExecutionPlan::next() on Controller N+1 times. The first N times,
// *executor is set to point to a new StepExecutor corresponding
// to that step. The N+1st time, *executor is set to nullptr,
// signifying there are no more steps.
// - If ExecutionPlan::next() returns anything other than ANEURALNETWORKS_NO_ERROR,
// a problem has occurred.
class Controller {
friend class ExecutionPlan;
private:
Controller(const Controller&) = delete;
Controller& operator=(const Controller&) = delete;
// Map from the operand index of a TEMPORARY in the original
// model to an offset into mTemporaries used to represent that
// TEMPORARY as an inter-partition input or output.
typedef std::map<uint32_t, uint32_t> SubModelInputsAndOutputsType;
static const size_t kBadStepIndex = ~size_t(0);
Controller(const ExecutionPlan* plan, ExecutionBuilder* executionBuilder,
const BurstBuilder* burstBuilder,
std::shared_ptr<const SubModelInputsAndOutputsType> subModelInputsAndOutputs,
uint32_t totalSizeOfTemporaries);
const ExecutionPlan* mPlan;
ExecutionBuilder* mExecutionBuilder;
const BurstBuilder* mBurstBuilder;
std::shared_ptr<const SubModelInputsAndOutputsType> mSubModelInputsAndOutputs; // may be nullptr
Memory mTemporaries;
size_t mNextStepIndex;
};
std::vector<std::shared_ptr<ExecutionBurstController>> makeBursts() const;
std::shared_ptr<Controller> makeController(ExecutionBuilder* executionBuilder,
const BurstBuilder* burstBuilder) const;
int next(std::shared_ptr<Controller> controller, std::shared_ptr<StepExecutor>* executor,
std::shared_ptr<ExecutionBurstController>* burstController = nullptr) const;
// Create the same executor as the last one created by next().
int fallback(std::shared_ptr<Controller> controller, std::shared_ptr<StepExecutor>* executor) const;
std::shared_ptr<ExecutionStep> createNewStep(const std::shared_ptr<Device> device);
void becomeSingleStep(const std::shared_ptr<Device> device, const ModelBuilder* model);
int finish(const ModelBuilder* fromModel, int32_t executionPreference);
void recordTemporaryDef(uint32_t fromModelIndex, uint32_t stepIndex) {
auto& temporaryToDefiningStep = compound()->mTemporaryToDefiningStep;
nnAssert(temporaryToDefiningStep.count(fromModelIndex) == 0);
temporaryToDefiningStep.insert(std::make_pair(fromModelIndex, stepIndex));
}
void dump() const;
void reset();
bool isValid() const { return mState != EMPTY && mBody != nullptr && mBody->mSuccessfulFinish; }
void setCaching(const std::string* cacheDir, const uint8_t* token) {
mCacheDir = cacheDir;
mToken = token;
}
const std::string* getCacheDir() const { return mCacheDir; }
const uint8_t* getCacheToken() const { return mToken; }
// These functions are solely intended for use by unit tests of
// the partitioning algorithm.
enum class Kind { ERROR, EMPTY, SIMPLE, COMPOUND };
Kind forTest_getKind() const;
std::shared_ptr<const Device> forTest_simpleGetDevice() const;
const std::vector<std::shared_ptr<ExecutionStep>>& forTest_compoundGetSteps() const;
bool forTest_hasSubModelOutputsOfUnknownSize() const;
const uint8_t* forTest_simpleGetCacheToken() const;
private:
void findTempsAsSubModelOutputs();
struct Body {
virtual ~Body() {}
virtual void dump() const = 0;
virtual int finish(const ModelBuilder* fromModel, int32_t executionPreference) = 0;
virtual bool hasSubModelOutputsOfUnknownSize() const = 0;
bool mSuccessfulFinish = false;
};
struct SimpleBody : Body {
SimpleBody(std::shared_ptr<Device> device, const ModelBuilder* model,
const std::string* cacheDir, const uint8_t* token)
: mDevice(device), mModel(model), mCacheDir(cacheDir), mToken(token) {}
void dump() const override;
int finish(const ModelBuilder* fromModel, int32_t executionPreference) override;
virtual bool hasSubModelOutputsOfUnknownSize() const override { return false; }
std::shared_ptr<Device> mDevice;
const ModelBuilder* mModel;
std::shared_ptr<VersionedIPreparedModel> mPreparedModel; // not used for CPU
const std::string* mCacheDir;
TokenHasher mToken;
};
struct CompoundBody : Body {
void dump() const override;
int finish(const ModelBuilder* fromModel, int32_t executionPreference) override;
virtual bool hasSubModelOutputsOfUnknownSize() const override {
return mHasSubModelOutputOfUnknownSize;
}
// TODO: Some of the data is working state information that
// shouldn't be needed after we've constructed but not
// executed the plan.
std::vector<std::shared_ptr<ExecutionStep>> mSteps;
// Map from original operand index to defining step index.
// Used for all (and only) TEMPORARY_VARIABLEs.
std::unordered_map<uint32_t, uint32_t> mTemporaryToDefiningStep;
bool mHasSubModelOutputOfUnknownSize = false;
private:
void findTempsAsSubModelOutputs();
};
enum { EMPTY, SIMPLE, COMPOUND } mState = EMPTY;
Body* mBody = nullptr;
CompoundBody* compound() {
nnAssert(mState == COMPOUND);
return static_cast<CompoundBody*>(mBody);
}
const CompoundBody* compound() const {
nnAssert(mState == COMPOUND);
return static_cast<const CompoundBody*>(mBody);
}
// Pointers to compilation caching information in CompilationBuilder.
const std::string* mCacheDir = nullptr;
const uint8_t* mToken = nullptr;
};
} // namespace nn
} // namespace android
#endif // ANDROID_ML_NN_RUNTIME_EXECUTION_PLAN_H