// Copyright (c) 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SOURCE_COMP_MARKV_CODEC_H_
#define SOURCE_COMP_MARKV_CODEC_H_
#include <list>
#include <map>
#include <memory>
#include <vector>
#include "source/assembly_grammar.h"
#include "source/comp/huffman_codec.h"
#include "source/comp/markv_model.h"
#include "source/comp/move_to_front.h"
#include "source/diagnostic.h"
#include "source/id_descriptor.h"
#include "source/val/instruction.h"
// Base class for MARK-V encoder and decoder. Contains common functionality
// such as:
// - Validator connection and validation state.
// - SPIR-V grammar and helper functions.
namespace spvtools {
namespace comp {
class MarkvLogger;
// Handles for move-to-front sequences. Enums which end with "Begin" define
// handle spaces which start at that value and span 16 or 32 bit wide.
enum : uint64_t {
kMtfNone = 0,
// All ids.
kMtfAll,
// All forward declared ids.
kMtfForwardDeclared,
// All type ids except for generated by OpTypeFunction.
kMtfTypeNonFunction,
// All labels.
kMtfLabel,
// All ids created by instructions which had type_id.
kMtfObject,
// All types generated by OpTypeFloat, OpTypeInt, OpTypeBool.
kMtfTypeScalar,
// All composite types.
kMtfTypeComposite,
// Boolean type or any vector type of it.
kMtfTypeBoolScalarOrVector,
// All float types or any vector floats type.
kMtfTypeFloatScalarOrVector,
// All int types or any vector int type.
kMtfTypeIntScalarOrVector,
// All types declared as return types in OpTypeFunction.
kMtfTypeReturnedByFunction,
// All composite objects.
kMtfComposite,
// All bool objects or vectors of bools.
kMtfBoolScalarOrVector,
// All float objects or vectors of float.
kMtfFloatScalarOrVector,
// All int objects or vectors of int.
kMtfIntScalarOrVector,
// All pointer types which point to composited.
kMtfTypePointerToComposite,
// Used by EncodeMtfRankHuffman.
kMtfGenericNonZeroRank,
// Handle space for ids of specific type.
kMtfIdOfTypeBegin = 0x10000,
// Handle space for ids generated by specific opcode.
kMtfIdGeneratedByOpcode = 0x20000,
// Handle space for ids of objects with type generated by specific opcode.
kMtfIdWithTypeGeneratedByOpcodeBegin = 0x30000,
// All vectors of specific component type.
kMtfVectorOfComponentTypeBegin = 0x40000,
// All vector types of specific size.
kMtfTypeVectorOfSizeBegin = 0x50000,
// All pointer types to specific type.
kMtfPointerToTypeBegin = 0x60000,
// All function types which return specific type.
kMtfFunctionTypeWithReturnTypeBegin = 0x70000,
// All function objects which return specific type.
kMtfFunctionWithReturnTypeBegin = 0x80000,
// Short id descriptor space (max 16-bit).
kMtfShortIdDescriptorSpaceBegin = 0x90000,
// Long id descriptor space (32-bit).
kMtfLongIdDescriptorSpaceBegin = 0x100000000,
};
class MarkvCodec {
public:
static const uint32_t kMarkvMagicNumber;
// Mtf ranks smaller than this are encoded with Huffman coding.
static const uint32_t kMtfSmallestRankEncodedByValue;
// Signals that the mtf rank is too large to be encoded with Huffman.
static const uint32_t kMtfRankEncodedByValueSignal;
static const uint32_t kShortDescriptorNumBits;
static const size_t kByteBreakAfterInstIfLessThanUntilNextByte;
static uint32_t GetMarkvVersion();
virtual ~MarkvCodec();
protected:
struct MarkvHeader {
MarkvHeader();
uint32_t magic_number;
uint32_t markv_version;
// Magic number to identify or verify MarkvModel used for encoding.
uint32_t markv_model = 0;
uint32_t markv_length_in_bits = 0;
uint32_t spirv_version = 0;
uint32_t spirv_generator = 0;
};
// |model| is owned by the caller, must be not null and valid during the
// lifetime of the codec.
MarkvCodec(spv_const_context context, spv_validator_options validator_options,
const MarkvModel* model);
// Returns instruction which created |id| or nullptr if such instruction was
// not registered.
const val::Instruction* FindDef(uint32_t id) const {
const auto it = id_to_def_instruction_.find(id);
if (it == id_to_def_instruction_.end()) return nullptr;
return it->second;
}
size_t GetNumBitsToNextByte(size_t bit_pos) const;
bool OpcodeHasFixedNumberOfOperands(SpvOp opcode) const;
// Returns type id of vector type component.
uint32_t GetVectorComponentType(uint32_t vector_type_id) const {
const val::Instruction* type_inst = FindDef(vector_type_id);
assert(type_inst);
assert(type_inst->opcode() == SpvOpTypeVector);
const uint32_t component_type =
type_inst->word(type_inst->operands()[1].offset);
return component_type;
}
// Returns mtf handle for ids of given type.
uint64_t GetMtfIdOfType(uint32_t type_id) const {
return kMtfIdOfTypeBegin + type_id;
}
// Returns mtf handle for ids generated by given opcode.
uint64_t GetMtfIdGeneratedByOpcode(SpvOp opcode) const {
return kMtfIdGeneratedByOpcode + opcode;
}
// Returns mtf handle for ids of type generated by given opcode.
uint64_t GetMtfIdWithTypeGeneratedByOpcode(SpvOp opcode) const {
return kMtfIdWithTypeGeneratedByOpcodeBegin + opcode;
}
// Returns mtf handle for vectors of specific component type.
uint64_t GetMtfVectorOfComponentType(uint32_t type_id) const {
return kMtfVectorOfComponentTypeBegin + type_id;
}
// Returns mtf handle for vector type of specific size.
uint64_t GetMtfTypeVectorOfSize(uint32_t size) const {
return kMtfTypeVectorOfSizeBegin + size;
}
// Returns mtf handle for pointers to specific size.
uint64_t GetMtfPointerToType(uint32_t type_id) const {
return kMtfPointerToTypeBegin + type_id;
}
// Returns mtf handle for function types with given return type.
uint64_t GetMtfFunctionTypeWithReturnType(uint32_t type_id) const {
return kMtfFunctionTypeWithReturnTypeBegin + type_id;
}
// Returns mtf handle for functions with given return type.
uint64_t GetMtfFunctionWithReturnType(uint32_t type_id) const {
return kMtfFunctionWithReturnTypeBegin + type_id;
}
// Returns mtf handle for the given long id descriptor.
uint64_t GetMtfLongIdDescriptor(uint32_t descriptor) const {
return kMtfLongIdDescriptorSpaceBegin + descriptor;
}
// Returns mtf handle for the given short id descriptor.
uint64_t GetMtfShortIdDescriptor(uint32_t descriptor) const {
return kMtfShortIdDescriptorSpaceBegin + descriptor;
}
// Process data from the current instruction. This would update MTFs and
// other data containers.
void ProcessCurInstruction();
// Returns move-to-front handle to be used for the current operand slot.
// Mtf handle is chosen based on a set of rules defined by SPIR-V grammar.
uint64_t GetRuleBasedMtf();
// Returns words of the current instruction. Decoder has a different
// implementation and the array is valid only until the previously decoded
// word.
virtual const uint32_t* GetInstWords() const { return inst_.words; }
// Returns the opcode of the previous instruction.
SpvOp GetPrevOpcode() const {
if (instructions_.empty()) return SpvOpNop;
return instructions_.back()->opcode();
}
// Returns diagnostic stream, position index is set to instruction number.
DiagnosticStream Diag(spv_result_t error_code) const {
return DiagnosticStream({0, 0, instructions_.size()}, context_->consumer,
"", error_code);
}
// Returns current id bound.
uint32_t GetIdBound() const { return id_bound_; }
// Sets current id bound, expected to be no lower than the previous one.
void SetIdBound(uint32_t id_bound) {
assert(id_bound >= id_bound_);
id_bound_ = id_bound;
}
// Returns Huffman codec for ranks of the mtf with given |handle|.
// Different mtfs can use different rank distributions.
// May return nullptr if the codec doesn't exist.
const HuffmanCodec<uint32_t>* GetMtfHuffmanCodec(uint64_t handle) const {
const auto it = mtf_huffman_codecs_.find(handle);
if (it == mtf_huffman_codecs_.end()) return nullptr;
return it->second.get();
}
// Promotes id in all move-to-front sequences if ids can be shared by multiple
// sequences.
void PromoteIfNeeded(uint32_t id) {
if (!model_->AnyDescriptorHasCodingScheme() &&
model_->id_fallback_strategy() ==
MarkvModel::IdFallbackStrategy::kShortDescriptor) {
// Move-to-front sequences do not share ids. Nothing to do.
return;
}
multi_mtf_.Promote(id);
}
spv_validator_options validator_options_ = nullptr;
const AssemblyGrammar grammar_;
MarkvHeader header_;
// MARK-V model, not owned.
const MarkvModel* model_ = nullptr;
// Current instruction, current operand and current operand index.
spv_parsed_instruction_t inst_;
spv_parsed_operand_t operand_;
uint32_t operand_index_;
// Maps a result ID to its type ID. By convention:
// - a result ID that is a type definition maps to itself.
// - a result ID without a type maps to 0. (E.g. for OpLabel)
std::unordered_map<uint32_t, uint32_t> id_to_type_id_;
// Container for all move-to-front sequences.
MultiMoveToFront multi_mtf_;
// Id of the current function or zero if outside of function.
uint32_t cur_function_id_ = 0;
// Return type of the current function.
uint32_t cur_function_return_type_ = 0;
// Remaining function parameter types. This container is filled on OpFunction,
// and drained on OpFunctionParameter.
std::list<uint32_t> remaining_function_parameter_types_;
// List of ids local to the current function.
std::vector<uint32_t> ids_local_to_cur_function_;
// List of instructions in the order they are given in the module.
std::vector<std::unique_ptr<const val::Instruction>> instructions_;
// Container/computer for long (32-bit) id descriptors.
IdDescriptorCollection long_id_descriptors_;
// Container/computer for short id descriptors.
// Short descriptors are stored in uint32_t, but their actual bit width is
// defined with kShortDescriptorNumBits.
// It doesn't seem logical to have a different computer for short id
// descriptors, since one could actually map/truncate long descriptors.
// But as short descriptors have collisions, the efficiency of
// compression depends on the collision pattern, and short descriptors
// produced by function ShortHashU32Array have been empirically proven to
// produce better results.
IdDescriptorCollection short_id_descriptors_;
// Huffman codecs for move-to-front ranks. The map key is mtf handle. Doesn't
// need to contain a different codec for every handle as most use one and the
// same.
std::map<uint64_t, std::unique_ptr<HuffmanCodec<uint32_t>>>
mtf_huffman_codecs_;
// If not nullptr, codec will log comments on the compression process.
std::unique_ptr<MarkvLogger> logger_;
spv_const_context context_ = nullptr;
private:
// Maps result id to the instruction which defined it.
std::unordered_map<uint32_t, const val::Instruction*> id_to_def_instruction_;
uint32_t id_bound_ = 1;
};
} // namespace comp
} // namespace spvtools
#endif // SOURCE_COMP_MARKV_CODEC_H_