普通文本  |  584行  |  16.75 KB

// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "courgette/assembly_program.h"

#include <memory.h>
#include <algorithm>
#include <map>
#include <set>
#include <sstream>
#include <vector>

#include "base/logging.h"
#include "base/memory/scoped_ptr.h"

#include "courgette/courgette.h"
#include "courgette/encoded_program.h"

namespace courgette {

// Opcodes of simple assembly language
enum OP {
  ORIGIN,         // ORIGIN <rva> - set current address for assembly.
  MAKEPERELOCS,   // Generates a base relocation table.
  MAKEELFRELOCS,  // Generates a base relocation table.
  DEFBYTE,        // DEFBYTE <value> - emit a byte literal.
  REL32,          // REL32 <label> - emit a rel32 encoded reference to 'label'.
  ABS32,          // REL32 <label> - emit am abs32 encoded reference to 'label'.
  REL32ARM,       // REL32ARM <c_op> <label> - arm-specific rel32 reference
  MAKEELFARMRELOCS, // Generates a base relocation table.
  DEFBYTES,       // Emits any number of byte literals
  LAST_OP
};

// Base class for instructions.  Because we have so many instructions we want to
// keep them as small as possible.  For this reason we avoid virtual functions.
//
class Instruction {
 public:
  OP op() const { return static_cast<OP>(op_); }

 protected:
  explicit Instruction(OP op) : op_(op), info_(0) {}
  Instruction(OP op, unsigned int info) : op_(op), info_(info) {}

  uint32 op_   : 4;    // A few bits to store the OP code.
  uint32 info_ : 28;   // Remaining bits in first word available to subclass.

 private:
  DISALLOW_COPY_AND_ASSIGN(Instruction);
};

namespace {

// Sets the current address for the emitting instructions.
class OriginInstruction : public Instruction {
 public:
  explicit OriginInstruction(RVA rva) : Instruction(ORIGIN, 0), rva_(rva) {}
  RVA origin_rva() const { return rva_; }
 private:
  RVA  rva_;
};

// Emits an entire PE base relocation table.
class PeRelocsInstruction : public Instruction {
 public:
  PeRelocsInstruction() : Instruction(MAKEPERELOCS) {}
};

// Emits an ELF relocation table.
class ElfRelocsInstruction : public Instruction {
 public:
  ElfRelocsInstruction() : Instruction(MAKEELFRELOCS) {}
};

// Emits an ELF ARM relocation table.
class ElfARMRelocsInstruction : public Instruction {
 public:
  ElfARMRelocsInstruction() : Instruction(MAKEELFARMRELOCS) {}
};

// Emits a single byte.
class ByteInstruction : public Instruction {
 public:
  explicit ByteInstruction(uint8 value) : Instruction(DEFBYTE, value) {}
  uint8 byte_value() const { return info_; }
};

// Emits a single byte.
class BytesInstruction : public Instruction {
 public:
  BytesInstruction(const uint8* values, uint32 len)
      : Instruction(DEFBYTES, 0),
        values_(values),
        len_(len) {}
  const uint8* byte_values() const { return values_; }
  uint32 len() const { return len_; }

 private:
  const uint8* values_;
  uint32 len_;
};

// A ABS32 to REL32 instruction emits a reference to a label's address.
class InstructionWithLabel : public Instruction {
 public:
  InstructionWithLabel(OP op, Label* label)
    : Instruction(op, 0), label_(label) {
    if (label == NULL) NOTREACHED();
  }
  Label* label() const { return label_; }
 protected:
  Label* label_;
};

// An ARM REL32 instruction emits a reference to a label's address and
// a specially-compressed ARM op.
class InstructionWithLabelARM : public InstructionWithLabel {
 public:
  InstructionWithLabelARM(OP op, uint16 compressed_op, Label* label,
                          const uint8* arm_op, uint16 op_size)
    : InstructionWithLabel(op, label), compressed_op_(compressed_op),
      arm_op_(arm_op), op_size_(op_size) {
    if (label == NULL) NOTREACHED();
  }
  uint16 compressed_op() const { return compressed_op_; }
  const uint8* arm_op() const { return arm_op_; }
  uint16 op_size() const { return op_size_; }
 private:
  uint16 compressed_op_;
  const uint8* arm_op_;
  uint16 op_size_;
};

}  // namespace

AssemblyProgram::AssemblyProgram(ExecutableType kind)
  : kind_(kind), image_base_(0) {
}

static void DeleteContainedLabels(const RVAToLabel& labels) {
  for (RVAToLabel::const_iterator p = labels.begin();  p != labels.end();  ++p)
    delete p->second;
}

AssemblyProgram::~AssemblyProgram() {
  for (size_t i = 0;  i < instructions_.size();  ++i) {
    Instruction* instruction = instructions_[i];
    if (instruction->op() != DEFBYTE)  // Will be in byte_instruction_cache_.
      delete instruction;
  }
  if (byte_instruction_cache_.get()) {
    for (size_t i = 0;  i < 256;  ++i)
      delete byte_instruction_cache_[i];
  }
  DeleteContainedLabels(rel32_labels_);
  DeleteContainedLabels(abs32_labels_);
}

CheckBool AssemblyProgram::EmitPeRelocsInstruction() {
  return Emit(new(std::nothrow) PeRelocsInstruction());
}

CheckBool AssemblyProgram::EmitElfRelocationInstruction() {
  return Emit(new(std::nothrow) ElfRelocsInstruction());
}

CheckBool AssemblyProgram::EmitElfARMRelocationInstruction() {
  return Emit(new(std::nothrow) ElfARMRelocsInstruction());
}

CheckBool AssemblyProgram::EmitOriginInstruction(RVA rva) {
  return Emit(new(std::nothrow) OriginInstruction(rva));
}

CheckBool AssemblyProgram::EmitByteInstruction(uint8 byte) {
  return Emit(GetByteInstruction(byte));
}

CheckBool AssemblyProgram::EmitBytesInstruction(const uint8* values,
                                                uint32 len) {
  return Emit(new(std::nothrow) BytesInstruction(values, len));
}

CheckBool AssemblyProgram::EmitRel32(Label* label) {
  return Emit(new(std::nothrow) InstructionWithLabel(REL32, label));
}

CheckBool AssemblyProgram::EmitRel32ARM(uint16 op, Label* label,
                                        const uint8* arm_op, uint16 op_size) {
  return Emit(new(std::nothrow) InstructionWithLabelARM(REL32ARM, op, label,
                                                        arm_op, op_size));
}

CheckBool AssemblyProgram::EmitAbs32(Label* label) {
  return Emit(new(std::nothrow) InstructionWithLabel(ABS32, label));
}

Label* AssemblyProgram::FindOrMakeAbs32Label(RVA rva) {
  return FindLabel(rva, &abs32_labels_);
}

Label* AssemblyProgram::FindOrMakeRel32Label(RVA rva) {
  return FindLabel(rva, &rel32_labels_);
}

void AssemblyProgram::DefaultAssignIndexes() {
  DefaultAssignIndexes(&abs32_labels_);
  DefaultAssignIndexes(&rel32_labels_);
}

void AssemblyProgram::UnassignIndexes() {
  UnassignIndexes(&abs32_labels_);
  UnassignIndexes(&rel32_labels_);
}

void AssemblyProgram::AssignRemainingIndexes() {
  AssignRemainingIndexes(&abs32_labels_);
  AssignRemainingIndexes(&rel32_labels_);
}

Label* AssemblyProgram::InstructionAbs32Label(
    const Instruction* instruction) const {
  if (instruction->op() == ABS32)
    return static_cast<const InstructionWithLabel*>(instruction)->label();
  return NULL;
}

Label* AssemblyProgram::InstructionRel32Label(
    const Instruction* instruction) const {
  if (instruction->op() == REL32 || instruction->op() == REL32ARM) {
    Label* label =
        static_cast<const InstructionWithLabel*>(instruction)->label();
    return label;
  }
  return NULL;
}

CheckBool AssemblyProgram::Emit(Instruction* instruction) {
  if (!instruction)
    return false;
  bool ok = instructions_.push_back(instruction);
  if (!ok)
    delete instruction;
  return ok;
}

Label* AssemblyProgram::FindLabel(RVA rva, RVAToLabel* labels) {
  Label*& slot = (*labels)[rva];
  if (slot == NULL) {
    slot = new(std::nothrow) Label(rva);
  }
  slot->count_++;
  return slot;
}

void AssemblyProgram::UnassignIndexes(RVAToLabel* labels) {
  for (RVAToLabel::iterator p = labels->begin();  p != labels->end();  ++p) {
    Label* current = p->second;
    current->index_ = Label::kNoIndex;
  }
}

// DefaultAssignIndexes takes a set of labels and assigns indexes in increasing
// address order.
//
void AssemblyProgram::DefaultAssignIndexes(RVAToLabel* labels) {
  int index = 0;
  for (RVAToLabel::iterator p = labels->begin();  p != labels->end();  ++p) {
    Label* current = p->second;
    if (current->index_ != Label::kNoIndex)
      NOTREACHED();
    current->index_ = index;
    ++index;
  }
}

// AssignRemainingIndexes assigns indexes to any addresses (labels) that are not
// yet assigned an index.
//
void AssemblyProgram::AssignRemainingIndexes(RVAToLabel* labels) {
  // An address table compresses best when each index is associated with an
  // address that is slight larger than the previous index.

  // First see which indexes have not been used.  The 'available' vector could
  // grow even bigger, but the number of addresses is a better starting size
  // than empty.
  std::vector<bool> available(labels->size(), true);
  int used = 0;

  for (RVAToLabel::iterator p = labels->begin();  p != labels->end();  ++p) {
    int index = p->second->index_;
    if (index != Label::kNoIndex) {
      while (static_cast<size_t>(index) >= available.size())
        available.push_back(true);
      available.at(index) = false;
      ++used;
    }
  }

  VLOG(1) << used << " of " << labels->size() << " labels pre-assigned";

  // Are there any unused labels that happen to be adjacent following a used
  // label?
  //
  int fill_forward_count = 0;
  Label* prev = 0;
  for (RVAToLabel::iterator p = labels->begin();  p != labels->end();  ++p) {
    Label* current = p->second;
    if (current->index_ == Label::kNoIndex) {
      int index = 0;
      if (prev  &&  prev->index_ != Label::kNoIndex)
        index = prev->index_ + 1;
      if (index < static_cast<int>(available.size()) && available.at(index)) {
        current->index_ = index;
        available.at(index) = false;
        ++fill_forward_count;
      }
    }
    prev = current;
  }

  // Are there any unused labels that happen to be adjacent preceeding a used
  // label?
  //
  int fill_backward_count = 0;
  prev = 0;
  for (RVAToLabel::reverse_iterator p = labels->rbegin();
       p != labels->rend();
       ++p) {
    Label* current = p->second;
    if (current->index_ == Label::kNoIndex) {
      int prev_index;
      if (prev)
        prev_index = prev->index_;
      else
        prev_index = static_cast<uint32>(available.size());
      if (prev_index != 0  &&
          prev_index != Label::kNoIndex  &&
          available.at(prev_index - 1)) {
        current->index_ = prev_index - 1;
        available.at(current->index_) = false;
        ++fill_backward_count;
      }
    }
    prev = current;
  }

  // Fill in any remaining indexes
  int fill_infill_count = 0;
  int index = 0;
  for (RVAToLabel::iterator p = labels->begin();  p != labels->end();  ++p) {
    Label* current = p->second;
    if (current->index_ == Label::kNoIndex) {
      while (!available.at(index)) {
        ++index;
      }
      current->index_ = index;
      available.at(index) = false;
      ++index;
      ++fill_infill_count;
    }
  }

  VLOG(1) << "  fill forward " << fill_forward_count
          << "  backward " << fill_backward_count
          << "  infill " << fill_infill_count;
}

typedef CheckBool (EncodedProgram::*DefineLabelMethod)(int index, RVA value);

#if defined(OS_WIN)
__declspec(noinline)
#endif
static CheckBool DefineLabels(const RVAToLabel& labels,
                              EncodedProgram* encoded_format,
                              DefineLabelMethod define_label) {
  bool ok = true;
  for (RVAToLabel::const_iterator p = labels.begin();
       ok && p != labels.end();
       ++p) {
    Label* label = p->second;
    ok = (encoded_format->*define_label)(label->index_, label->rva_);
  }
  return ok;
}

EncodedProgram* AssemblyProgram::Encode() const {
  scoped_ptr<EncodedProgram> encoded(new(std::nothrow) EncodedProgram());
  if (!encoded.get())
    return NULL;

  encoded->set_image_base(image_base_);

  if (!DefineLabels(abs32_labels_, encoded.get(),
                    &EncodedProgram::DefineAbs32Label) ||
      !DefineLabels(rel32_labels_, encoded.get(),
                    &EncodedProgram::DefineRel32Label)) {
    return NULL;
  }

  encoded->EndLabels();

  for (size_t i = 0;  i < instructions_.size();  ++i) {
    Instruction* instruction = instructions_[i];

    switch (instruction->op()) {
      case ORIGIN: {
        OriginInstruction* org = static_cast<OriginInstruction*>(instruction);
        if (!encoded->AddOrigin(org->origin_rva()))
          return NULL;
        break;
      }
      case DEFBYTE: {
        uint8 b = static_cast<ByteInstruction*>(instruction)->byte_value();
        if (!encoded->AddCopy(1, &b))
          return NULL;
        break;
      }
      case DEFBYTES: {
        const uint8* byte_values =
          static_cast<BytesInstruction*>(instruction)->byte_values();
        uint32 len = static_cast<BytesInstruction*>(instruction)->len();

        if (!encoded->AddCopy(len, byte_values))
          return NULL;
        break;
      }
      case REL32: {
        Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
        if (!encoded->AddRel32(label->index_))
          return NULL;
        break;
      }
      case REL32ARM: {
        Label* label =
            static_cast<InstructionWithLabelARM*>(instruction)->label();
        uint16 compressed_op =
          static_cast<InstructionWithLabelARM*>(instruction)->
          compressed_op();
        if (!encoded->AddRel32ARM(compressed_op, label->index_))
          return NULL;
        break;
      }
      case ABS32: {
        Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
        if (!encoded->AddAbs32(label->index_))
          return NULL;
        break;
      }
      case MAKEPERELOCS: {
        if (!encoded->AddPeMakeRelocs(kind_))
          return NULL;
        break;
      }
      case MAKEELFRELOCS: {
        if (!encoded->AddElfMakeRelocs())
          return NULL;
        break;
      }
      case MAKEELFARMRELOCS: {
        if (!encoded->AddElfARMMakeRelocs())
          return NULL;
        break;
      }
      default: {
        NOTREACHED() << "Unknown Insn OP kind";
      }
    }
  }

  return encoded.release();
}

Instruction* AssemblyProgram::GetByteInstruction(uint8 byte) {
  if (!byte_instruction_cache_.get()) {
    byte_instruction_cache_.reset(new(std::nothrow) Instruction*[256]);
    if (!byte_instruction_cache_.get())
      return NULL;

    for (int i = 0; i < 256; ++i) {
      byte_instruction_cache_[i] =
          new(std::nothrow) ByteInstruction(static_cast<uint8>(i));
      if (!byte_instruction_cache_[i]) {
        for (int j = 0; j < i; ++j)
          delete byte_instruction_cache_[j];
        byte_instruction_cache_.reset();
        return NULL;
      }
    }
  }

  return byte_instruction_cache_[byte];
}

// Chosen empirically to give the best reduction in payload size for
// an update from daisy_3701.98.0 to daisy_4206.0.0.
const int AssemblyProgram::kLabelLowerLimit = 5;

CheckBool AssemblyProgram::TrimLabels() {
  // For now only trim for ARM binaries
  if (kind() != EXE_ELF_32_ARM)
    return true;

  int lower_limit = kLabelLowerLimit;

  VLOG(1) << "TrimLabels: threshold " << lower_limit;

  // Remove underused labels from the list of labels
  RVAToLabel::iterator it = rel32_labels_.begin();
  while (it != rel32_labels_.end()) {
    if (it->second->count_ <= lower_limit) {
      rel32_labels_.erase(it++);
    } else {
      ++it;
    }
  }

  // Walk through the list of instructions, replacing trimmed labels
  // with the original machine instruction
  for (size_t i = 0; i < instructions_.size(); ++i) {
    Instruction* instruction = instructions_[i];
    switch (instruction->op()) {
      case REL32ARM: {
        Label* label =
            static_cast<InstructionWithLabelARM*>(instruction)->label();
        if (label->count_ <= lower_limit) {
          const uint8* arm_op =
              static_cast<InstructionWithLabelARM*>(instruction)->arm_op();
          uint16 op_size =
              static_cast<InstructionWithLabelARM*>(instruction)->op_size();

          if (op_size < 1)
            return false;
          BytesInstruction* new_instruction =
            new(std::nothrow) BytesInstruction(arm_op, op_size);
          instructions_[i] = new_instruction;
        }
        break;
      }
      default:
        break;
    }
  }

  return true;
}

void AssemblyProgram::PrintLabelCounts(RVAToLabel* labels) {
  for (RVAToLabel::const_iterator p = labels->begin(); p != labels->end();
       ++p) {
    Label* current = p->second;
    if (current->index_ != Label::kNoIndex)
      printf("%d\n", current->count_);
  }
}

void AssemblyProgram::CountRel32ARM() {
  PrintLabelCounts(&rel32_labels_);
}

////////////////////////////////////////////////////////////////////////////////

Status TrimLabels(AssemblyProgram* program) {
  if (program->TrimLabels())
    return C_OK;
  else
    return C_TRIM_FAILED;
}

Status Encode(AssemblyProgram* program, EncodedProgram** output) {
  *output = NULL;
  EncodedProgram *encoded = program->Encode();
  if (encoded) {
    *output = encoded;
    return C_OK;
  } else {
    return C_GENERAL_ERROR;
  }
}

}  // namespace courgette