// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "courgette/assembly_program.h"
#include <memory.h>
#include <algorithm>
#include <map>
#include <set>
#include <sstream>
#include <vector>
#include "base/logging.h"
#include "base/memory/scoped_ptr.h"
#include "courgette/courgette.h"
#include "courgette/encoded_program.h"
namespace courgette {
// Opcodes of simple assembly language
enum OP {
ORIGIN, // ORIGIN <rva> - set current address for assembly.
MAKEPERELOCS, // Generates a base relocation table.
MAKEELFRELOCS, // Generates a base relocation table.
DEFBYTE, // DEFBYTE <value> - emit a byte literal.
REL32, // REL32 <label> - emit a rel32 encoded reference to 'label'.
ABS32, // REL32 <label> - emit am abs32 encoded reference to 'label'.
REL32ARM, // REL32ARM <c_op> <label> - arm-specific rel32 reference
MAKEELFARMRELOCS, // Generates a base relocation table.
DEFBYTES, // Emits any number of byte literals
LAST_OP
};
// Base class for instructions. Because we have so many instructions we want to
// keep them as small as possible. For this reason we avoid virtual functions.
//
class Instruction {
public:
OP op() const { return static_cast<OP>(op_); }
protected:
explicit Instruction(OP op) : op_(op), info_(0) {}
Instruction(OP op, unsigned int info) : op_(op), info_(info) {}
uint32 op_ : 4; // A few bits to store the OP code.
uint32 info_ : 28; // Remaining bits in first word available to subclass.
private:
DISALLOW_COPY_AND_ASSIGN(Instruction);
};
namespace {
// Sets the current address for the emitting instructions.
class OriginInstruction : public Instruction {
public:
explicit OriginInstruction(RVA rva) : Instruction(ORIGIN, 0), rva_(rva) {}
RVA origin_rva() const { return rva_; }
private:
RVA rva_;
};
// Emits an entire PE base relocation table.
class PeRelocsInstruction : public Instruction {
public:
PeRelocsInstruction() : Instruction(MAKEPERELOCS) {}
};
// Emits an ELF relocation table.
class ElfRelocsInstruction : public Instruction {
public:
ElfRelocsInstruction() : Instruction(MAKEELFRELOCS) {}
};
// Emits an ELF ARM relocation table.
class ElfARMRelocsInstruction : public Instruction {
public:
ElfARMRelocsInstruction() : Instruction(MAKEELFARMRELOCS) {}
};
// Emits a single byte.
class ByteInstruction : public Instruction {
public:
explicit ByteInstruction(uint8 value) : Instruction(DEFBYTE, value) {}
uint8 byte_value() const { return info_; }
};
// Emits a single byte.
class BytesInstruction : public Instruction {
public:
BytesInstruction(const uint8* values, uint32 len)
: Instruction(DEFBYTES, 0),
values_(values),
len_(len) {}
const uint8* byte_values() const { return values_; }
uint32 len() const { return len_; }
private:
const uint8* values_;
uint32 len_;
};
// A ABS32 to REL32 instruction emits a reference to a label's address.
class InstructionWithLabel : public Instruction {
public:
InstructionWithLabel(OP op, Label* label)
: Instruction(op, 0), label_(label) {
if (label == NULL) NOTREACHED();
}
Label* label() const { return label_; }
protected:
Label* label_;
};
// An ARM REL32 instruction emits a reference to a label's address and
// a specially-compressed ARM op.
class InstructionWithLabelARM : public InstructionWithLabel {
public:
InstructionWithLabelARM(OP op, uint16 compressed_op, Label* label,
const uint8* arm_op, uint16 op_size)
: InstructionWithLabel(op, label), compressed_op_(compressed_op),
arm_op_(arm_op), op_size_(op_size) {
if (label == NULL) NOTREACHED();
}
uint16 compressed_op() const { return compressed_op_; }
const uint8* arm_op() const { return arm_op_; }
uint16 op_size() const { return op_size_; }
private:
uint16 compressed_op_;
const uint8* arm_op_;
uint16 op_size_;
};
} // namespace
AssemblyProgram::AssemblyProgram(ExecutableType kind)
: kind_(kind), image_base_(0) {
}
static void DeleteContainedLabels(const RVAToLabel& labels) {
for (RVAToLabel::const_iterator p = labels.begin(); p != labels.end(); ++p)
delete p->second;
}
AssemblyProgram::~AssemblyProgram() {
for (size_t i = 0; i < instructions_.size(); ++i) {
Instruction* instruction = instructions_[i];
if (instruction->op() != DEFBYTE) // Will be in byte_instruction_cache_.
delete instruction;
}
if (byte_instruction_cache_.get()) {
for (size_t i = 0; i < 256; ++i)
delete byte_instruction_cache_[i];
}
DeleteContainedLabels(rel32_labels_);
DeleteContainedLabels(abs32_labels_);
}
CheckBool AssemblyProgram::EmitPeRelocsInstruction() {
return Emit(new(std::nothrow) PeRelocsInstruction());
}
CheckBool AssemblyProgram::EmitElfRelocationInstruction() {
return Emit(new(std::nothrow) ElfRelocsInstruction());
}
CheckBool AssemblyProgram::EmitElfARMRelocationInstruction() {
return Emit(new(std::nothrow) ElfARMRelocsInstruction());
}
CheckBool AssemblyProgram::EmitOriginInstruction(RVA rva) {
return Emit(new(std::nothrow) OriginInstruction(rva));
}
CheckBool AssemblyProgram::EmitByteInstruction(uint8 byte) {
return Emit(GetByteInstruction(byte));
}
CheckBool AssemblyProgram::EmitBytesInstruction(const uint8* values,
uint32 len) {
return Emit(new(std::nothrow) BytesInstruction(values, len));
}
CheckBool AssemblyProgram::EmitRel32(Label* label) {
return Emit(new(std::nothrow) InstructionWithLabel(REL32, label));
}
CheckBool AssemblyProgram::EmitRel32ARM(uint16 op, Label* label,
const uint8* arm_op, uint16 op_size) {
return Emit(new(std::nothrow) InstructionWithLabelARM(REL32ARM, op, label,
arm_op, op_size));
}
CheckBool AssemblyProgram::EmitAbs32(Label* label) {
return Emit(new(std::nothrow) InstructionWithLabel(ABS32, label));
}
Label* AssemblyProgram::FindOrMakeAbs32Label(RVA rva) {
return FindLabel(rva, &abs32_labels_);
}
Label* AssemblyProgram::FindOrMakeRel32Label(RVA rva) {
return FindLabel(rva, &rel32_labels_);
}
void AssemblyProgram::DefaultAssignIndexes() {
DefaultAssignIndexes(&abs32_labels_);
DefaultAssignIndexes(&rel32_labels_);
}
void AssemblyProgram::UnassignIndexes() {
UnassignIndexes(&abs32_labels_);
UnassignIndexes(&rel32_labels_);
}
void AssemblyProgram::AssignRemainingIndexes() {
AssignRemainingIndexes(&abs32_labels_);
AssignRemainingIndexes(&rel32_labels_);
}
Label* AssemblyProgram::InstructionAbs32Label(
const Instruction* instruction) const {
if (instruction->op() == ABS32)
return static_cast<const InstructionWithLabel*>(instruction)->label();
return NULL;
}
Label* AssemblyProgram::InstructionRel32Label(
const Instruction* instruction) const {
if (instruction->op() == REL32 || instruction->op() == REL32ARM) {
Label* label =
static_cast<const InstructionWithLabel*>(instruction)->label();
return label;
}
return NULL;
}
CheckBool AssemblyProgram::Emit(Instruction* instruction) {
if (!instruction)
return false;
bool ok = instructions_.push_back(instruction);
if (!ok)
delete instruction;
return ok;
}
Label* AssemblyProgram::FindLabel(RVA rva, RVAToLabel* labels) {
Label*& slot = (*labels)[rva];
if (slot == NULL) {
slot = new(std::nothrow) Label(rva);
}
slot->count_++;
return slot;
}
void AssemblyProgram::UnassignIndexes(RVAToLabel* labels) {
for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
Label* current = p->second;
current->index_ = Label::kNoIndex;
}
}
// DefaultAssignIndexes takes a set of labels and assigns indexes in increasing
// address order.
//
void AssemblyProgram::DefaultAssignIndexes(RVAToLabel* labels) {
int index = 0;
for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
Label* current = p->second;
if (current->index_ != Label::kNoIndex)
NOTREACHED();
current->index_ = index;
++index;
}
}
// AssignRemainingIndexes assigns indexes to any addresses (labels) that are not
// yet assigned an index.
//
void AssemblyProgram::AssignRemainingIndexes(RVAToLabel* labels) {
// An address table compresses best when each index is associated with an
// address that is slight larger than the previous index.
// First see which indexes have not been used. The 'available' vector could
// grow even bigger, but the number of addresses is a better starting size
// than empty.
std::vector<bool> available(labels->size(), true);
int used = 0;
for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
int index = p->second->index_;
if (index != Label::kNoIndex) {
while (static_cast<size_t>(index) >= available.size())
available.push_back(true);
available.at(index) = false;
++used;
}
}
VLOG(1) << used << " of " << labels->size() << " labels pre-assigned";
// Are there any unused labels that happen to be adjacent following a used
// label?
//
int fill_forward_count = 0;
Label* prev = 0;
for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
Label* current = p->second;
if (current->index_ == Label::kNoIndex) {
int index = 0;
if (prev && prev->index_ != Label::kNoIndex)
index = prev->index_ + 1;
if (index < static_cast<int>(available.size()) && available.at(index)) {
current->index_ = index;
available.at(index) = false;
++fill_forward_count;
}
}
prev = current;
}
// Are there any unused labels that happen to be adjacent preceeding a used
// label?
//
int fill_backward_count = 0;
prev = 0;
for (RVAToLabel::reverse_iterator p = labels->rbegin();
p != labels->rend();
++p) {
Label* current = p->second;
if (current->index_ == Label::kNoIndex) {
int prev_index;
if (prev)
prev_index = prev->index_;
else
prev_index = static_cast<uint32>(available.size());
if (prev_index != 0 &&
prev_index != Label::kNoIndex &&
available.at(prev_index - 1)) {
current->index_ = prev_index - 1;
available.at(current->index_) = false;
++fill_backward_count;
}
}
prev = current;
}
// Fill in any remaining indexes
int fill_infill_count = 0;
int index = 0;
for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
Label* current = p->second;
if (current->index_ == Label::kNoIndex) {
while (!available.at(index)) {
++index;
}
current->index_ = index;
available.at(index) = false;
++index;
++fill_infill_count;
}
}
VLOG(1) << " fill forward " << fill_forward_count
<< " backward " << fill_backward_count
<< " infill " << fill_infill_count;
}
typedef CheckBool (EncodedProgram::*DefineLabelMethod)(int index, RVA value);
#if defined(OS_WIN)
__declspec(noinline)
#endif
static CheckBool DefineLabels(const RVAToLabel& labels,
EncodedProgram* encoded_format,
DefineLabelMethod define_label) {
bool ok = true;
for (RVAToLabel::const_iterator p = labels.begin();
ok && p != labels.end();
++p) {
Label* label = p->second;
ok = (encoded_format->*define_label)(label->index_, label->rva_);
}
return ok;
}
EncodedProgram* AssemblyProgram::Encode() const {
scoped_ptr<EncodedProgram> encoded(new(std::nothrow) EncodedProgram());
if (!encoded.get())
return NULL;
encoded->set_image_base(image_base_);
if (!DefineLabels(abs32_labels_, encoded.get(),
&EncodedProgram::DefineAbs32Label) ||
!DefineLabels(rel32_labels_, encoded.get(),
&EncodedProgram::DefineRel32Label)) {
return NULL;
}
encoded->EndLabels();
for (size_t i = 0; i < instructions_.size(); ++i) {
Instruction* instruction = instructions_[i];
switch (instruction->op()) {
case ORIGIN: {
OriginInstruction* org = static_cast<OriginInstruction*>(instruction);
if (!encoded->AddOrigin(org->origin_rva()))
return NULL;
break;
}
case DEFBYTE: {
uint8 b = static_cast<ByteInstruction*>(instruction)->byte_value();
if (!encoded->AddCopy(1, &b))
return NULL;
break;
}
case DEFBYTES: {
const uint8* byte_values =
static_cast<BytesInstruction*>(instruction)->byte_values();
uint32 len = static_cast<BytesInstruction*>(instruction)->len();
if (!encoded->AddCopy(len, byte_values))
return NULL;
break;
}
case REL32: {
Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
if (!encoded->AddRel32(label->index_))
return NULL;
break;
}
case REL32ARM: {
Label* label =
static_cast<InstructionWithLabelARM*>(instruction)->label();
uint16 compressed_op =
static_cast<InstructionWithLabelARM*>(instruction)->
compressed_op();
if (!encoded->AddRel32ARM(compressed_op, label->index_))
return NULL;
break;
}
case ABS32: {
Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
if (!encoded->AddAbs32(label->index_))
return NULL;
break;
}
case MAKEPERELOCS: {
if (!encoded->AddPeMakeRelocs(kind_))
return NULL;
break;
}
case MAKEELFRELOCS: {
if (!encoded->AddElfMakeRelocs())
return NULL;
break;
}
case MAKEELFARMRELOCS: {
if (!encoded->AddElfARMMakeRelocs())
return NULL;
break;
}
default: {
NOTREACHED() << "Unknown Insn OP kind";
}
}
}
return encoded.release();
}
Instruction* AssemblyProgram::GetByteInstruction(uint8 byte) {
if (!byte_instruction_cache_.get()) {
byte_instruction_cache_.reset(new(std::nothrow) Instruction*[256]);
if (!byte_instruction_cache_.get())
return NULL;
for (int i = 0; i < 256; ++i) {
byte_instruction_cache_[i] =
new(std::nothrow) ByteInstruction(static_cast<uint8>(i));
if (!byte_instruction_cache_[i]) {
for (int j = 0; j < i; ++j)
delete byte_instruction_cache_[j];
byte_instruction_cache_.reset();
return NULL;
}
}
}
return byte_instruction_cache_[byte];
}
// Chosen empirically to give the best reduction in payload size for
// an update from daisy_3701.98.0 to daisy_4206.0.0.
const int AssemblyProgram::kLabelLowerLimit = 5;
CheckBool AssemblyProgram::TrimLabels() {
// For now only trim for ARM binaries
if (kind() != EXE_ELF_32_ARM)
return true;
int lower_limit = kLabelLowerLimit;
VLOG(1) << "TrimLabels: threshold " << lower_limit;
// Remove underused labels from the list of labels
RVAToLabel::iterator it = rel32_labels_.begin();
while (it != rel32_labels_.end()) {
if (it->second->count_ <= lower_limit) {
rel32_labels_.erase(it++);
} else {
++it;
}
}
// Walk through the list of instructions, replacing trimmed labels
// with the original machine instruction
for (size_t i = 0; i < instructions_.size(); ++i) {
Instruction* instruction = instructions_[i];
switch (instruction->op()) {
case REL32ARM: {
Label* label =
static_cast<InstructionWithLabelARM*>(instruction)->label();
if (label->count_ <= lower_limit) {
const uint8* arm_op =
static_cast<InstructionWithLabelARM*>(instruction)->arm_op();
uint16 op_size =
static_cast<InstructionWithLabelARM*>(instruction)->op_size();
if (op_size < 1)
return false;
BytesInstruction* new_instruction =
new(std::nothrow) BytesInstruction(arm_op, op_size);
instructions_[i] = new_instruction;
}
break;
}
default:
break;
}
}
return true;
}
void AssemblyProgram::PrintLabelCounts(RVAToLabel* labels) {
for (RVAToLabel::const_iterator p = labels->begin(); p != labels->end();
++p) {
Label* current = p->second;
if (current->index_ != Label::kNoIndex)
printf("%d\n", current->count_);
}
}
void AssemblyProgram::CountRel32ARM() {
PrintLabelCounts(&rel32_labels_);
}
////////////////////////////////////////////////////////////////////////////////
Status TrimLabels(AssemblyProgram* program) {
if (program->TrimLabels())
return C_OK;
else
return C_TRIM_FAILED;
}
Status Encode(AssemblyProgram* program, EncodedProgram** output) {
*output = NULL;
EncodedProgram *encoded = program->Encode();
if (encoded) {
*output = encoded;
return C_OK;
} else {
return C_GENERAL_ERROR;
}
}
} // namespace courgette