// Copyright 2013 Google Inc. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// This contains a suite of tools for transforming symbol information when
// when that information has been extracted from a PDB containing OMAP
// information.
// OMAP information is a lightweight description of a mapping between two
// address spaces. It consists of two streams, each of them a vector 2-tuples.
// The OMAPTO stream contains tuples of the form
//
// (RVA in transformed image, RVA in original image)
//
// while the OMAPFROM stream contains tuples of the form
//
// (RVA in original image, RVA in transformed image)
//
// The entries in each vector are sorted by the first value of the tuple, and
// the lengths associated with a mapping are implicit as the distance between
// two successive addresses in the vector.
// Consider a trivial 10-byte function described by the following symbol:
//
// Function: RVA 0x00001000, length 10, "foo"
//
// Now consider the same function, but with 5-bytes of instrumentation injected
// at offset 5. The OMAP streams describing this would look like:
//
// OMAPTO : [ [0x00001000, 0x00001000],
// [0x00001005, 0xFFFFFFFF],
// [0x0000100a, 0x00001005] ]
// OMAPFROM: [ [0x00001000, 0x00001000],
// [0x00001005, 0x0000100a] ]
//
// In this case the injected code has been marked as not originating in the
// source image, and thus it will have no symbol information at all. However,
// the injected code may also be associated with an original address range;
// for example, when prepending instrumentation to a basic block the
// instrumentation can be labelled as originating from the same source BB such
// that symbol resolution will still find the appropriate source code line
// number. In this case the OMAP stream would look like:
//
// OMAPTO : [ [0x00001000, 0x00001000],
// [0x00001005, 0x00001005],
// [0x0000100a, 0x00001005] ]
// OMAPFROM: [ [0x00001000, 0x00001000],
// [0x00001005, 0x0000100a] ]
//
// Suppose we asked DIA to lookup the symbol at location 0x0000100a of the
// instrumented image. It would first run this through the OMAPTO table and
// translate that address to 0x00001005. It would then lookup the symbol
// at that address and return the symbol for the function "foo". This is the
// correct result.
//
// However, if we query DIA for the length and address of the symbol it will
// tell us that it has length 10 and is at RVA 0x00001000. The location is
// correct, but the length doesn't take into account the 5-bytes of injected
// code. Symbol resolution works (starting from an instrumented address,
// mapping to an original address, and looking up a symbol), but the symbol
// metadata is incorrect.
//
// If we dump the symbols using DIA they will have their addresses
// appropriately transformed and reflect positions in the instrumented image.
// However, if we try to do a lookup using those symbols resolution can fail.
// For example, the address 0x0000100a will not map to the symbol for "foo",
// because DIA tells us it is at location 0x00001000 (correct) with length
// 10 (incorrect). The problem is one of order of operations: in this case
// we're attempting symbol resolution by looking up an instrumented address
// in the table of translated symbols.
//
// One way to handle this is to dump the OMAP information as part of the
// breakpad symbols. This requires the rest of the toolchain to be aware of
// OMAP information and to use it when present prior to performing lookup. The
// other option is to properly transform the symbols (updating length as well as
// position) so that resolution will work as expected for translated addresses.
// This is transparent to the rest of the toolchain.
#include "common/windows/omap.h"
#include <atlbase.h>
#include <algorithm>
#include <cassert>
#include <set>
#include "common/windows/dia_util.h"
namespace google_breakpad {
namespace {
static const wchar_t kOmapToDebugStreamName[] = L"OMAPTO";
static const wchar_t kOmapFromDebugStreamName[] = L"OMAPFROM";
// Dependending on where this is used in breakpad we sometimes get min/max from
// windef, and other times from algorithm. To get around this we simply
// define our own min/max functions.
template<typename T>
const T& Min(const T& t1, const T& t2) { return t1 < t2 ? t1 : t2; }
template<typename T>
const T& Max(const T& t1, const T& t2) { return t1 > t2 ? t1 : t2; }
// It makes things more readable to have two different OMAP types. We cast
// normal OMAPs into these. They must be the same size as the OMAP structure
// for this to work, hence the static asserts.
struct OmapOrigToTran {
DWORD rva_original;
DWORD rva_transformed;
};
struct OmapTranToOrig {
DWORD rva_transformed;
DWORD rva_original;
};
static_assert(sizeof(OmapOrigToTran) == sizeof(OMAP),
"OmapOrigToTran must have same size as OMAP.");
static_assert(sizeof(OmapTranToOrig) == sizeof(OMAP),
"OmapTranToOrig must have same size as OMAP.");
typedef std::vector<OmapOrigToTran> OmapFromTable;
typedef std::vector<OmapTranToOrig> OmapToTable;
// Used for sorting and searching through a Mapping.
bool MappedRangeOriginalLess(const MappedRange& lhs, const MappedRange& rhs) {
if (lhs.rva_original < rhs.rva_original)
return true;
if (lhs.rva_original > rhs.rva_original)
return false;
return lhs.length < rhs.length;
}
bool MappedRangeMappedLess(const MappedRange& lhs, const MappedRange& rhs) {
if (lhs.rva_transformed < rhs.rva_transformed)
return true;
if (lhs.rva_transformed > rhs.rva_transformed)
return false;
return lhs.length < rhs.length;
}
// Used for searching through the EndpointIndexMap.
bool EndpointIndexLess(const EndpointIndex& ei1, const EndpointIndex& ei2) {
return ei1.endpoint < ei2.endpoint;
}
// Finds the debug stream with the given |name| in the given |session|, and
// populates |table| with its contents. Casts the data directly into OMAP
// structs.
bool FindAndLoadOmapTable(const wchar_t* name,
IDiaSession* session,
OmapTable* table) {
assert(name != NULL);
assert(session != NULL);
assert(table != NULL);
CComPtr<IDiaEnumDebugStreamData> stream;
if (!FindDebugStream(name, session, &stream))
return false;
assert(stream.p != NULL);
LONG count = 0;
if (FAILED(stream->get_Count(&count))) {
fprintf(stderr, "IDiaEnumDebugStreamData::get_Count failed for stream "
"\"%ws\"\n", name);
return false;
}
// Get the length of the stream in bytes.
DWORD bytes_read = 0;
ULONG count_read = 0;
if (FAILED(stream->Next(count, 0, &bytes_read, NULL, &count_read))) {
fprintf(stderr, "IDiaEnumDebugStreamData::Next failed while reading "
"length of stream \"%ws\"\n", name);
return false;
}
// Ensure it's consistent with the OMAP data type.
DWORD bytes_expected = count * sizeof(OmapTable::value_type);
if (count * sizeof(OmapTable::value_type) != bytes_read) {
fprintf(stderr, "DIA debug stream \"%ws\" has an unexpected length", name);
return false;
}
// Read the table.
table->resize(count);
bytes_read = 0;
count_read = 0;
if (FAILED(stream->Next(count, bytes_expected, &bytes_read,
reinterpret_cast<BYTE*>(&table->at(0)),
&count_read))) {
fprintf(stderr, "IDiaEnumDebugStreamData::Next failed while reading "
"data from stream \"%ws\"\n");
return false;
}
return true;
}
// This determines the original image length by looking through the segment
// table.
bool GetOriginalImageLength(IDiaSession* session, DWORD* image_length) {
assert(session != NULL);
assert(image_length != NULL);
CComPtr<IDiaEnumSegments> enum_segments;
if (!FindTable(session, &enum_segments))
return false;
assert(enum_segments.p != NULL);
DWORD temp_image_length = 0;
CComPtr<IDiaSegment> segment;
ULONG fetched = 0;
while (SUCCEEDED(enum_segments->Next(1, &segment, &fetched)) &&
fetched == 1) {
assert(segment.p != NULL);
DWORD rva = 0;
DWORD length = 0;
DWORD frame = 0;
if (FAILED(segment->get_relativeVirtualAddress(&rva)) ||
FAILED(segment->get_length(&length)) ||
FAILED(segment->get_frame(&frame))) {
fprintf(stderr, "Failed to get basic properties for IDiaSegment\n");
return false;
}
if (frame > 0) {
DWORD segment_end = rva + length;
if (segment_end > temp_image_length)
temp_image_length = segment_end;
}
segment.Release();
}
*image_length = temp_image_length;
return true;
}
// Detects regions of the original image that have been removed in the
// transformed image, and sets the 'removed' property on all mapped ranges
// immediately preceding a gap. The mapped ranges must be sorted by
// 'rva_original'.
void FillInRemovedLengths(Mapping* mapping) {
assert(mapping != NULL);
// Find and fill gaps. We do this with two sweeps. We first sweep forward
// looking for gaps. When we identify a gap we then sweep forward with a
// second scan and set the 'removed' property for any intervals that
// immediately precede the gap.
//
// Gaps are typically between two successive intervals, but not always:
//
// Range 1: ---------------
// Range 2: -------
// Range 3: -------------
// Gap : ******
//
// In the above example the gap is between range 1 and range 3. A forward
// sweep finds the gap, and a second forward sweep identifies that range 1
// immediately precedes the gap and sets its 'removed' property.
size_t fill = 0;
DWORD rva_front = 0;
for (size_t find = 0; find < mapping->size(); ++find) {
#ifndef NDEBUG
// We expect the mapped ranges to be sorted by 'rva_original'.
if (find > 0) {
assert(mapping->at(find - 1).rva_original <=
mapping->at(find).rva_original);
}
#endif
if (rva_front < mapping->at(find).rva_original) {
// We've found a gap. Fill it in by setting the 'removed' property for
// any affected intervals.
DWORD removed = mapping->at(find).rva_original - rva_front;
for (; fill < find; ++fill) {
if (mapping->at(fill).rva_original + mapping->at(fill).length !=
rva_front) {
continue;
}
// This interval ends right where the gap starts. It needs to have its
// 'removed' information filled in.
mapping->at(fill).removed = removed;
}
}
// Advance the front that indicates the covered portion of the image.
rva_front = mapping->at(find).rva_original + mapping->at(find).length;
}
}
// Builds a unified view of the mapping between the original and transformed
// image space by merging OMAPTO and OMAPFROM data.
void BuildMapping(const OmapData& omap_data, Mapping* mapping) {
assert(mapping != NULL);
mapping->clear();
if (omap_data.omap_from.empty() || omap_data.omap_to.empty())
return;
// The names 'omap_to' and 'omap_from' are awfully confusing, so we make
// ourselves more explicit here. This cast is only safe because the underlying
// types have the exact same size.
const OmapToTable& tran2orig =
reinterpret_cast<const OmapToTable&>(omap_data.omap_to);
const OmapFromTable& orig2tran = reinterpret_cast<const OmapFromTable&>(
omap_data.omap_from);
// Handle the range of data at the beginning of the image. This is not usually
// specified by the OMAP data.
if (tran2orig[0].rva_transformed > 0 && orig2tran[0].rva_original > 0) {
DWORD header_transformed = tran2orig[0].rva_transformed;
DWORD header_original = orig2tran[0].rva_original;
DWORD header = Min(header_transformed, header_original);
MappedRange mr = {};
mr.length = header;
mr.injected = header_transformed - header;
mr.removed = header_original - header;
mapping->push_back(mr);
}
// Convert the implied lengths to explicit lengths, and infer which content
// has been injected into the transformed image. Injected content is inferred
// as regions of the transformed address space that does not map back to
// known valid content in the original image.
for (size_t i = 0; i < tran2orig.size(); ++i) {
const OmapTranToOrig& o1 = tran2orig[i];
// This maps to content that is outside the original image, thus it
// describes injected content. We can skip this entry.
if (o1.rva_original >= omap_data.length_original)
continue;
// Calculate the length of the current OMAP entry. This is implicit as the
// distance between successive |rva| values, capped at the end of the
// original image.
DWORD length = 0;
if (i + 1 < tran2orig.size()) {
const OmapTranToOrig& o2 = tran2orig[i + 1];
// We expect the table to be sorted by rva_transformed.
assert(o1.rva_transformed <= o2.rva_transformed);
length = o2.rva_transformed - o1.rva_transformed;
if (o1.rva_original + length > omap_data.length_original) {
length = omap_data.length_original - o1.rva_original;
}
} else {
length = omap_data.length_original - o1.rva_original;
}
// Zero-length entries don't describe anything and can be ignored.
if (length == 0)
continue;
// Any gaps in the transformed address-space are due to injected content.
if (!mapping->empty()) {
MappedRange& prev_mr = mapping->back();
prev_mr.injected += o1.rva_transformed -
(prev_mr.rva_transformed + prev_mr.length);
}
MappedRange mr = {};
mr.rva_original = o1.rva_original;
mr.rva_transformed = o1.rva_transformed;
mr.length = length;
mapping->push_back(mr);
}
// Sort based on the original image addresses.
std::sort(mapping->begin(), mapping->end(), MappedRangeOriginalLess);
// Fill in the 'removed' lengths by looking for gaps in the coverage of the
// original address space.
FillInRemovedLengths(mapping);
return;
}
void BuildEndpointIndexMap(ImageMap* image_map) {
assert(image_map != NULL);
if (image_map->mapping.size() == 0)
return;
const Mapping& mapping = image_map->mapping;
EndpointIndexMap& eim = image_map->endpoint_index_map;
// Get the unique set of interval endpoints.
std::set<DWORD> endpoints;
for (size_t i = 0; i < mapping.size(); ++i) {
endpoints.insert(mapping[i].rva_original);
endpoints.insert(mapping[i].rva_original +
mapping[i].length +
mapping[i].removed);
}
// Use the endpoints to initialize the secondary search structure for the
// mapping.
eim.resize(endpoints.size());
std::set<DWORD>::const_iterator it = endpoints.begin();
for (size_t i = 0; it != endpoints.end(); ++it, ++i) {
eim[i].endpoint = *it;
eim[i].index = mapping.size();
}
// For each endpoint we want the smallest index of any interval containing
// it. We iterate over the intervals and update the indices associated with
// each interval endpoint contained in the current interval. In the general
// case of an arbitrary set of intervals this is O(n^2), but the structure of
// OMAP data makes this O(n).
for (size_t i = 0; i < mapping.size(); ++i) {
EndpointIndex ei1 = { mapping[i].rva_original, 0 };
EndpointIndexMap::iterator it1 = std::lower_bound(
eim.begin(), eim.end(), ei1, EndpointIndexLess);
EndpointIndex ei2 = { mapping[i].rva_original + mapping[i].length +
mapping[i].removed, 0 };
EndpointIndexMap::iterator it2 = std::lower_bound(
eim.begin(), eim.end(), ei2, EndpointIndexLess);
for (; it1 != it2; ++it1)
it1->index = Min(i, it1->index);
}
}
// Clips the given mapped range.
void ClipMappedRangeOriginal(const AddressRange& clip_range,
MappedRange* mapped_range) {
assert(mapped_range != NULL);
// The clipping range is entirely outside of the mapped range.
if (clip_range.end() <= mapped_range->rva_original ||
mapped_range->rva_original + mapped_range->length +
mapped_range->removed <= clip_range.rva) {
mapped_range->length = 0;
mapped_range->injected = 0;
mapped_range->removed = 0;
return;
}
// Clip the left side.
if (mapped_range->rva_original < clip_range.rva) {
DWORD clip_left = clip_range.rva - mapped_range->rva_original;
mapped_range->rva_original += clip_left;
mapped_range->rva_transformed += clip_left;
if (clip_left > mapped_range->length) {
// The left clipping boundary entirely erases the content section of the
// range.
DWORD trim = clip_left - mapped_range->length;
mapped_range->length = 0;
mapped_range->injected -= Min(trim, mapped_range->injected);
// We know that trim <= mapped_range->remove.
mapped_range->removed -= trim;
} else {
// The left clipping boundary removes some, but not all, of the content.
// As such it leaves the removed/injected component intact.
mapped_range->length -= clip_left;
}
}
// Clip the right side.
DWORD end_original = mapped_range->rva_original + mapped_range->length;
if (clip_range.end() < end_original) {
// The right clipping boundary lands in the 'content' section of the range,
// entirely clearing the injected/removed portion.
DWORD clip_right = end_original - clip_range.end();
mapped_range->length -= clip_right;
mapped_range->injected = 0;
mapped_range->removed = 0;
return;
} else {
// The right clipping boundary is outside of the content, but may affect
// the removed/injected portion of the range.
DWORD end_removed = end_original + mapped_range->removed;
if (clip_range.end() < end_removed)
mapped_range->removed = clip_range.end() - end_original;
DWORD end_injected = end_original + mapped_range->injected;
if (clip_range.end() < end_injected)
mapped_range->injected = clip_range.end() - end_original;
}
return;
}
} // namespace
int AddressRange::Compare(const AddressRange& rhs) const {
if (end() <= rhs.rva)
return -1;
if (rhs.end() <= rva)
return 1;
return 0;
}
bool GetOmapDataAndDisableTranslation(IDiaSession* session,
OmapData* omap_data) {
assert(session != NULL);
assert(omap_data != NULL);
CComPtr<IDiaAddressMap> address_map;
if (FAILED(session->QueryInterface(&address_map))) {
fprintf(stderr, "IDiaSession::QueryInterface(IDiaAddressMap) failed\n");
return false;
}
assert(address_map.p != NULL);
BOOL omap_enabled = FALSE;
if (FAILED(address_map->get_addressMapEnabled(&omap_enabled))) {
fprintf(stderr, "IDiaAddressMap::get_addressMapEnabled failed\n");
return false;
}
if (!omap_enabled) {
// We indicate the non-presence of OMAP data by returning empty tables.
omap_data->omap_from.clear();
omap_data->omap_to.clear();
omap_data->length_original = 0;
return true;
}
// OMAP data is present. Disable translation.
if (FAILED(address_map->put_addressMapEnabled(FALSE))) {
fprintf(stderr, "IDiaAddressMap::put_addressMapEnabled failed\n");
return false;
}
// Read the OMAP streams.
if (!FindAndLoadOmapTable(kOmapFromDebugStreamName,
session,
&omap_data->omap_from)) {
return false;
}
if (!FindAndLoadOmapTable(kOmapToDebugStreamName,
session,
&omap_data->omap_to)) {
return false;
}
// Get the lengths of the address spaces.
if (!GetOriginalImageLength(session, &omap_data->length_original))
return false;
return true;
}
void BuildImageMap(const OmapData& omap_data, ImageMap* image_map) {
assert(image_map != NULL);
BuildMapping(omap_data, &image_map->mapping);
BuildEndpointIndexMap(image_map);
}
void MapAddressRange(const ImageMap& image_map,
const AddressRange& original_range,
AddressRangeVector* mapped_ranges) {
assert(mapped_ranges != NULL);
const Mapping& map = image_map.mapping;
// Handle the trivial case of an empty image_map. This means that there is
// no transformation to be applied, and we can simply return the original
// range.
if (map.empty()) {
mapped_ranges->push_back(original_range);
return;
}
// If we get a query of length 0 we need to handle it by using a non-zero
// query length.
AddressRange query_range(original_range);
if (query_range.length == 0)
query_range.length = 1;
// Find the range of intervals that can potentially intersect our query range.
size_t imin = 0;
size_t imax = 0;
{
// The index of the earliest possible range that can affect is us done by
// searching through the secondary indexing structure.
const EndpointIndexMap& eim = image_map.endpoint_index_map;
EndpointIndex q1 = { query_range.rva, 0 };
EndpointIndexMap::const_iterator it1 = std::lower_bound(
eim.begin(), eim.end(), q1, EndpointIndexLess);
if (it1 == eim.end()) {
imin = map.size();
} else {
// Backup to find the interval that contains our query point.
if (it1 != eim.begin() && query_range.rva < it1->endpoint)
--it1;
imin = it1->index;
}
// The first range that can't possibly intersect us is found by searching
// through the image map directly as it is already sorted by interval start
// point.
MappedRange q2 = { query_range.end(), 0 };
Mapping::const_iterator it2 = std::lower_bound(
map.begin(), map.end(), q2, MappedRangeOriginalLess);
imax = it2 - map.begin();
}
// Find all intervals that intersect the query range.
Mapping temp_map;
for (size_t i = imin; i < imax; ++i) {
MappedRange mr = map[i];
ClipMappedRangeOriginal(query_range, &mr);
if (mr.length + mr.injected > 0)
temp_map.push_back(mr);
}
// If there are no intersecting ranges then the query range has been removed
// from the image in question.
if (temp_map.empty())
return;
// Sort based on transformed addresses.
std::sort(temp_map.begin(), temp_map.end(), MappedRangeMappedLess);
// Zero-length queries can't actually be merged. We simply output the set of
// unique RVAs that correspond to the query RVA.
if (original_range.length == 0) {
mapped_ranges->push_back(AddressRange(temp_map[0].rva_transformed, 0));
for (size_t i = 1; i < temp_map.size(); ++i) {
if (temp_map[i].rva_transformed > mapped_ranges->back().rva)
mapped_ranges->push_back(AddressRange(temp_map[i].rva_transformed, 0));
}
return;
}
// Merge any ranges that are consecutive in the mapped image. We merge over
// injected content if it makes ranges contiguous, but we ignore any injected
// content at the tail end of a range. This allows us to detect symbols that
// have been lengthened by injecting content in the middle. However, it
// misses the case where content has been injected at the head or the tail.
// The problem is that it doesn't know whether to attribute it to the
// preceding or following symbol. It is up to the author of the transform to
// output explicit OMAP info in these cases to ensure full coverage of the
// transformed address space.
DWORD rva_begin = temp_map[0].rva_transformed;
DWORD rva_cur_content = rva_begin + temp_map[0].length;
DWORD rva_cur_injected = rva_cur_content + temp_map[0].injected;
for (size_t i = 1; i < temp_map.size(); ++i) {
if (rva_cur_injected < temp_map[i].rva_transformed) {
// This marks the end of a continuous range in the image. Output the
// current range and start a new one.
if (rva_begin < rva_cur_content) {
mapped_ranges->push_back(
AddressRange(rva_begin, rva_cur_content - rva_begin));
}
rva_begin = temp_map[i].rva_transformed;
}
rva_cur_content = temp_map[i].rva_transformed + temp_map[i].length;
rva_cur_injected = rva_cur_content + temp_map[i].injected;
}
// Output the range in progress.
if (rva_begin < rva_cur_content) {
mapped_ranges->push_back(
AddressRange(rva_begin, rva_cur_content - rva_begin));
}
return;
}
} // namespace google_breakpad