/*--------------------------------------------------------------------*/
/*--- Ptrcheck: a pointer-use checker. ---*/
/*--- This file checks stack and global array accesses. ---*/
/*--- sg_main.c ---*/
/*--------------------------------------------------------------------*/
/*
This file is part of Ptrcheck, a Valgrind tool for checking pointer
use in programs.
Copyright (C) 2008-2015 OpenWorks Ltd
info@open-works.co.uk
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307, USA.
The GNU General Public License is contained in the file COPYING.
Neither the names of the U.S. Department of Energy nor the
University of California nor the names of its contributors may be
used to endorse or promote products derived from this software
without prior written permission.
*/
#include "pub_tool_basics.h"
#include "pub_tool_libcbase.h"
#include "pub_tool_libcassert.h"
#include "pub_tool_libcprint.h"
#include "pub_tool_tooliface.h"
#include "pub_tool_wordfm.h"
#include "pub_tool_xarray.h"
#include "pub_tool_threadstate.h"
#include "pub_tool_mallocfree.h"
#include "pub_tool_machine.h"
#include "pub_tool_debuginfo.h"
#include "pub_tool_options.h"
#include "pc_common.h"
#include "sg_main.h" // self
static
void preen_global_Invars ( Addr a, SizeT len ); /*fwds*/
//////////////////////////////////////////////////////////////
// //
// Basic Stuff //
// //
//////////////////////////////////////////////////////////////
static inline Bool is_sane_TId ( ThreadId tid )
{
return tid >= 0 && tid < VG_N_THREADS
&& tid != VG_INVALID_THREADID;
}
static void* sg_malloc ( const HChar* cc, SizeT n ) {
void* p;
tl_assert(n > 0);
p = VG_(malloc)( cc, n );
return p;
}
static void sg_free ( void* p ) {
tl_assert(p);
VG_(free)(p);
}
/* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
first interval is lower, 1 if the first interval is higher, and 0
if there is any overlap. Redundant paranoia with casting is there
following what looked distinctly like a bug in gcc-4.1.2, in which
some of the comparisons were done signedly instead of
unsignedly. */
inline
static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
Addr a2, SizeT n2 ) {
UWord a1w = (UWord)a1;
UWord n1w = (UWord)n1;
UWord a2w = (UWord)a2;
UWord n2w = (UWord)n2;
tl_assert(n1w > 0 && n2w > 0);
if (a1w + n1w <= a2w) return -1L;
if (a2w + n2w <= a1w) return 1L;
return 0;
}
/* Return true iff [aSmall,aSmall+nSmall) is entirely contained
within [aBig,aBig+nBig). */
inline
static Bool is_subinterval_of ( Addr aBig, SizeT nBig,
Addr aSmall, SizeT nSmall ) {
tl_assert(nBig > 0 && nSmall > 0);
return aBig <= aSmall && aSmall + nSmall <= aBig + nBig;
}
inline
static Addr Addr__min ( Addr a1, Addr a2 ) {
return a1 < a2 ? a1 : a2;
}
inline
static Addr Addr__max ( Addr a1, Addr a2 ) {
return a1 < a2 ? a2 : a1;
}
//////////////////////////////////////////////////////////////
// //
// StackBlocks Persistent Cache //
// //
//////////////////////////////////////////////////////////////
/* We maintain a set of XArray* of StackBlocks. These are never
freed. When a new StackBlock vector is acquired from
VG_(di_get_local_blocks_at_ip), we compare it to the existing set.
If not present, it is added. If present, the just-acquired one is
freed and the copy used.
This simplifies storage management elsewhere. It allows us to
assume that a pointer to an XArray* of StackBlock is valid forever.
It also means there are no duplicates anywhere, which could be
important from a space point of view for programs that generate a
lot of translations, or where translations are frequently discarded
and re-made.
Note that we normalise the arrays by sorting the elements according
to an arbitrary total order, so as to avoid the situation that two
vectors describe the same set of variables but are not structurally
identical. */
static inline Bool StackBlock__sane ( const StackBlock* fb )
{
if (fb->name[ sizeof(fb->name)-1 ] != 0)
return False;
if (fb->spRel != False && fb->spRel != True)
return False;
if (fb->isVec != False && fb->isVec != True)
return False;
return True;
}
/* Generate an arbitrary total ordering on StackBlocks. */
static Word StackBlock__cmp ( const StackBlock* fb1, const StackBlock* fb2 )
{
Word r;
tl_assert(StackBlock__sane(fb1));
tl_assert(StackBlock__sane(fb2));
/* Hopefully the .base test hits most of the time. For the blocks
associated with any particular instruction, if the .base values
are the same then probably it doesn't make sense for the other
fields to be different. But this is supposed to be a completely
general structural total order, so we have to compare everything
anyway. */
if (fb1->base < fb2->base) return -1;
if (fb1->base > fb2->base) return 1;
/* compare sizes */
if (fb1->szB < fb2->szB) return -1;
if (fb1->szB > fb2->szB) return 1;
/* compare sp/fp flag */
if (fb1->spRel < fb2->spRel) return -1;
if (fb1->spRel > fb2->spRel) return 1;
/* compare is/is-not array-typed flag */
if (fb1->isVec < fb2->isVec) return -1;
if (fb1->isVec > fb2->isVec) return 1;
/* compare the name */
r = (Word)VG_(strcmp)(fb1->name, fb2->name);
return r;
}
/* Returns True if all fields except .szB are the same. szBs may or
may not be the same; they are simply not consulted. */
static Bool StackBlock__all_fields_except_szB_are_equal (
StackBlock* fb1,
StackBlock* fb2
)
{
tl_assert(StackBlock__sane(fb1));
tl_assert(StackBlock__sane(fb2));
return fb1->base == fb2->base
&& fb1->spRel == fb2->spRel
&& fb1->isVec == fb2->isVec
&& 0 == VG_(strcmp)(fb1->name, fb2->name);
}
/* Generate an arbitrary total ordering on vectors of StackBlocks. */
static Word StackBlocks__cmp ( XArray* fb1s, XArray* fb2s )
{
Word i, r, n1, n2;
n1 = VG_(sizeXA)( fb1s );
n2 = VG_(sizeXA)( fb2s );
if (n1 < n2) return -1;
if (n1 > n2) return 1;
for (i = 0; i < n1; i++) {
StackBlock *fb1, *fb2;
fb1 = VG_(indexXA)( fb1s, i );
fb2 = VG_(indexXA)( fb2s, i );
r = StackBlock__cmp( fb1, fb2 );
if (r != 0) return r;
}
tl_assert(i == n1 && i == n2);
return 0;
}
static void pp_StackBlocks ( XArray* sbs )
{
Word i, n = VG_(sizeXA)( sbs );
VG_(message)(Vg_DebugMsg, "<<< STACKBLOCKS\n" );
for (i = 0; i < n; i++) {
StackBlock* sb = (StackBlock*)VG_(indexXA)( sbs, i );
VG_(message)(Vg_DebugMsg,
" StackBlock{ off %ld szB %lu spRel:%c isVec:%c \"%s\" }\n",
sb->base, sb->szB, sb->spRel ? 'Y' : 'N',
sb->isVec ? 'Y' : 'N', &sb->name[0]
);
}
VG_(message)(Vg_DebugMsg, ">>> STACKBLOCKS\n" );
}
/* ---------- The StackBlock vector cache ---------- */
static WordFM* /* XArray* of StackBlock -> nothing */
frameBlocks_set = NULL;
static void init_StackBlocks_set ( void )
{
tl_assert(!frameBlocks_set);
frameBlocks_set
= VG_(newFM)( sg_malloc, "di.sg_main.iSBs.1", sg_free,
(Word(*)(UWord,UWord))StackBlocks__cmp );
tl_assert(frameBlocks_set);
}
/* Find the given StackBlock-vector in our collection thereof. If
found, deallocate the supplied one, and return the address of the
copy. If not found, add the supplied one to our collection and
return its address. */
static XArray* /* of StackBlock */
StackBlocks__find_and_dealloc__or_add
( XArray* /* of StackBlock */ orig )
{
UWord key, val;
/* First, normalise, as per comments above. */
VG_(setCmpFnXA)( orig, (XACmpFn_t)StackBlock__cmp );
VG_(sortXA)( orig );
/* Now get rid of any exact duplicates. */
nuke_dups:
{ Word r, w, nEQ, n = VG_(sizeXA)( orig );
if (n >= 2) {
w = 0;
nEQ = 0;
for (r = 0; r < n; r++) {
if (r+1 < n) {
StackBlock* pR0 = VG_(indexXA)( orig, r+0 );
StackBlock* pR1 = VG_(indexXA)( orig, r+1 );
Word c = StackBlock__cmp(pR0,pR1);
tl_assert(c == -1 || c == 0);
if (c == 0) { nEQ++; continue; }
}
if (w != r) {
StackBlock* pW = VG_(indexXA)( orig, w );
StackBlock* pR = VG_(indexXA)( orig, r );
*pW = *pR;
}
w++;
}
tl_assert(r == n);
tl_assert(w + nEQ == n);
if (w < n) {
VG_(dropTailXA)( orig, n-w );
}
if (0) VG_(printf)("delta %ld\n", n-w);
}
}
/* Deal with the following strangeness, where two otherwise
identical blocks are claimed to have different sizes. In which
case we use the larger size. */
/* StackBlock{ off 16 szB 66 spRel:Y isVec:Y "sz" }
StackBlock{ off 16 szB 130 spRel:Y isVec:Y "sz" }
StackBlock{ off 208 szB 16 spRel:Y isVec:Y "ar" }
*/
{ Word i, n = VG_(sizeXA)( orig );
if (n >= 2) {
for (i = 0; i < n-1; i++) {
StackBlock* sb0 = VG_(indexXA)( orig, i+0 );
StackBlock* sb1 = VG_(indexXA)( orig, i+1 );
if (StackBlock__all_fields_except_szB_are_equal(sb0, sb1)) {
/* They can't be identical because the previous tidying
pass would have removed the duplicates. And they
can't be > because the earlier sorting pass would
have ordered otherwise-identical descriptors
according to < on .szB fields. Hence: */
tl_assert(sb0->szB < sb1->szB);
sb0->szB = sb1->szB;
/* This makes the blocks identical, at the size of the
larger one. Rather than go to all the hassle of
sliding the rest down, simply go back to the
remove-duplicates stage. The assertion guarantees
that we eventually make progress, since the rm-dups
stage will get rid of one of the blocks. This is
expected to happen only exceedingly rarely. */
tl_assert(StackBlock__cmp(sb0,sb1) == 0);
goto nuke_dups;
}
}
}
}
/* If there are any blocks which overlap and have the same
fpRel-ness, junk the whole descriptor; it's obviously bogus.
Icc11 certainly generates bogus info from time to time.
This check is pretty weak; really we ought to have a stronger
sanity check. */
{ Word i, n = VG_(sizeXA)( orig );
static Int moans = 3;
for (i = 0; i < n-1; i++) {
StackBlock* sb1 = (StackBlock*)VG_(indexXA)( orig, i );
StackBlock* sb2 = (StackBlock*)VG_(indexXA)( orig, i+1 );
if (sb1->spRel == sb2->spRel
&& (sb1->base >= sb2->base
|| sb1->base + sb1->szB > sb2->base)) {
if (moans > 0 && !VG_(clo_xml)) {
moans--;
VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: "
"overlapping stack blocks\n");
if (VG_(clo_verbosity) >= 2)
pp_StackBlocks(orig);
if (moans == 0)
VG_(message)(Vg_UserMsg, "Further instances of this "
"message will not be shown\n" );
}
VG_(dropTailXA)( orig, VG_(sizeXA)( orig ));
break;
}
}
}
/* Now, do we have it already? */
if (VG_(lookupFM)( frameBlocks_set, &key, &val, (UWord)orig )) {
/* yes */
XArray* res;
tl_assert(val == 0);
tl_assert(key != (UWord)orig);
VG_(deleteXA)(orig);
res = (XArray*)key;
return res;
} else {
/* no */
VG_(addToFM)( frameBlocks_set, (UWord)orig, 0 );
return orig;
}
}
/* Top level function for getting the StackBlock vector for a given
instruction. It is guaranteed that the returned pointer will be
valid for the entire rest of the run, and also that the addresses
of the individual elements of the array will not change. */
static XArray* /* of StackBlock */ get_StackBlocks_for_IP ( Addr ip )
{
XArray* blocks = VG_(di_get_stack_blocks_at_ip)( ip, True/*arrays only*/ );
tl_assert(blocks);
return StackBlocks__find_and_dealloc__or_add( blocks );
}
//////////////////////////////////////////////////////////////
// //
// GlobalBlocks Persistent Cache //
// //
//////////////////////////////////////////////////////////////
/* Generate an arbitrary total ordering on GlobalBlocks. */
static Word GlobalBlock__cmp ( GlobalBlock* gb1, GlobalBlock* gb2 )
{
Word r;
/* compare addrs */
if (gb1->addr < gb2->addr) return -1;
if (gb1->addr > gb2->addr) return 1;
/* compare sizes */
if (gb1->szB < gb2->szB) return -1;
if (gb1->szB > gb2->szB) return 1;
/* compare is/is-not array-typed flag */
if (gb1->isVec < gb2->isVec) return -1;
if (gb1->isVec > gb2->isVec) return 1;
/* compare the name */
r = (Word)VG_(strcmp)(gb1->name, gb2->name);
if (r != 0) return r;
/* compare the soname */
r = (Word)VG_(strcmp)(gb1->soname, gb2->soname);
return r;
}
static WordFM* /* GlobalBlock* -> nothing */
globalBlock_set = NULL;
static void init_GlobalBlock_set ( void )
{
tl_assert(!globalBlock_set);
globalBlock_set
= VG_(newFM)( sg_malloc, "di.sg_main.iGBs.1", sg_free,
(Word(*)(UWord,UWord))GlobalBlock__cmp );
tl_assert(globalBlock_set);
}
/* Top level function for making GlobalBlocks persistent. Call here
with a non-persistent version, and the returned one is guaranteed
to be valid for the entire rest of the run. The supplied one is
copied, not stored, so can be freed after the call. */
static GlobalBlock* get_persistent_GlobalBlock ( GlobalBlock* orig )
{
UWord key, val;
/* Now, do we have it already? */
if (VG_(lookupFM)( globalBlock_set, &key, &val, (UWord)orig )) {
/* yes, return the copy */
GlobalBlock* res;
tl_assert(val == 0);
res = (GlobalBlock*)key;
tl_assert(res != orig);
return res;
} else {
/* no. clone it, store the clone and return the clone's
address. */
GlobalBlock* clone = sg_malloc( "di.sg_main.gpGB.1",
sizeof(GlobalBlock) );
tl_assert(clone);
*clone = *orig;
VG_(addToFM)( globalBlock_set, (UWord)clone, 0 );
return clone;
}
}
//////////////////////////////////////////////////////////////
// //
// Interval tree of StackTreeBlock //
// //
//////////////////////////////////////////////////////////////
/* A node in a stack interval tree. Zero length intervals (.szB == 0)
are not allowed.
A stack interval tree is a (WordFM StackTreeNode* void). There is
one stack interval tree for each thread.
*/
typedef
struct {
Addr addr;
SizeT szB; /* copied from .descr->szB */
StackBlock* descr; /* it's an instance of this block */
UWord depth; /* depth of stack at time block was pushed */
}
StackTreeNode;
static void pp_StackTree ( WordFM* sitree, const HChar* who )
{
UWord keyW, valW;
VG_(printf)("<<< BEGIN pp_StackTree %s\n", who );
VG_(initIterFM)( sitree );
while (VG_(nextIterFM)( sitree, &keyW, &valW )) {
StackTreeNode* nd = (StackTreeNode*)keyW;
VG_(printf)(" [%#lx,+%lu) descr=%p %s %lu\n", nd->addr, nd->szB,
nd->descr, nd->descr->name, nd->descr->szB);
}
VG_(printf)(">>> END pp_StackTree %s\n", who );
}
/* Interval comparison function for StackTreeNode */
static Word cmp_intervals_StackTreeNode ( StackTreeNode* sn1,
StackTreeNode* sn2 )
{
return cmp_nonempty_intervals(sn1->addr, sn1->szB,
sn2->addr, sn2->szB);
}
/* Find the node holding 'a', if any. */
static StackTreeNode* find_StackTreeNode ( WordFM* sitree, Addr a )
{
UWord keyW, valW;
StackTreeNode key;
tl_assert(sitree);
key.addr = a;
key.szB = 1;
if (VG_(lookupFM)( sitree, &keyW, &valW, (UWord)&key )) {
StackTreeNode* res = (StackTreeNode*)keyW;
tl_assert(valW == 0);
tl_assert(res != &key);
return res;
} else {
return NULL;
}
}
/* Note that the supplied XArray of FrameBlock must have been
made persistent already. */
__attribute__((noinline))
static void add_blocks_to_StackTree (
/*MOD*/WordFM* sitree,
XArray* /* FrameBlock */ descrs,
XArray* /* Addr */ bases,
UWord depth
)
{
Bool debug = (Bool)0;
Word i, nDescrs, nBases;
nDescrs = VG_(sizeXA)( descrs ),
nBases = VG_(sizeXA)( bases );
tl_assert(nDescrs == nBases);
if (nDescrs == 0) return;
tl_assert(sitree);
if (debug) {
VG_(printf)("\ndepth = %lu\n", depth);
pp_StackTree( sitree, "add_blocks_to_StackTree-pre" );
pp_StackBlocks(descrs);
}
for (i = 0; i < nDescrs; i++) {
Bool already_present;
StackTreeNode* nyu;
Addr addr = *(Addr*)VG_(indexXA)( bases, i );
StackBlock* descr = (StackBlock*)VG_(indexXA)( descrs, i );
tl_assert(descr->szB > 0);
nyu = sg_malloc( "di.sg_main.abtST.1", sizeof(StackTreeNode) );
nyu->addr = addr;
nyu->szB = descr->szB;
nyu->descr = descr;
nyu->depth = depth;
if (debug) VG_(printf)("ADD %#lx %lu\n", addr, descr->szB);
already_present = VG_(addToFM)( sitree, (UWord)nyu, 0 );
/* The interval can't already be there; else we have
overlapping stack blocks. */
tl_assert(!already_present);
if (debug) {
pp_StackTree( sitree, "add_blocks_to_StackTree-step" );
}
}
if (debug) {
pp_StackTree( sitree, "add_blocks_to_StackTree-post" );
VG_(printf)("\n");
}
}
static void del_blocks_from_StackTree ( /*MOD*/WordFM* sitree,
XArray* /* Addr */ bases )
{
UWord oldK, oldV;
Word i, nBases = VG_(sizeXA)( bases );
for (i = 0; i < nBases; i++) {
Bool b;
Addr addr = *(Addr*)VG_(indexXA)( bases, i );
StackTreeNode* nd = find_StackTreeNode(sitree, addr);
/* The interval must be there; we added it earlier when
the associated frame was created. */
tl_assert(nd);
b = VG_(delFromFM)( sitree, &oldK, &oldV, (UWord)nd );
/* we just found the block! */
tl_assert(b);
tl_assert(oldV == 0);
tl_assert(nd == (StackTreeNode*)oldK);
sg_free(nd);
}
}
static void delete_StackTree__kFin ( UWord keyW ) {
StackTreeNode* nd = (StackTreeNode*)keyW;
tl_assert(nd);
sg_free(nd);
}
static void delete_StackTree__vFin ( UWord valW ) {
tl_assert(valW == 0);
}
static void delete_StackTree ( WordFM* sitree )
{
VG_(deleteFM)( sitree,
delete_StackTree__kFin, delete_StackTree__vFin );
}
static WordFM* new_StackTree ( void ) {
return VG_(newFM)( sg_malloc, "di.sg_main.nST.1", sg_free,
(Word(*)(UWord,UWord))cmp_intervals_StackTreeNode );
}
//////////////////////////////////////////////////////////////
// //
// Interval tree of GlobalTreeBlock //
// //
//////////////////////////////////////////////////////////////
/* A node in a global interval tree. Zero length intervals
(.szB == 0) are not allowed.
A global interval tree is a (WordFM GlobalTreeNode* void). There
is one global interval tree for the entire process.
*/
typedef
struct {
Addr addr; /* copied from .descr->addr */
SizeT szB; /* copied from .descr->szB */
GlobalBlock* descr; /* it's this block */
}
GlobalTreeNode;
static void GlobalTreeNode__pp ( GlobalTreeNode* nd ) {
tl_assert(nd->descr);
VG_(printf)("GTNode [%#lx,+%lu) %s",
nd->addr, nd->szB, nd->descr->name);
}
static void GlobalTree__pp ( WordFM* /* of (GlobalTreeNode,void) */ gitree,
const HChar* who )
{
UWord keyW, valW;
GlobalTreeNode* nd;
VG_(printf)("<<< GlobalBlockTree (%s)\n", who);
VG_(initIterFM)( gitree );
while (VG_(nextIterFM)( gitree, &keyW, &valW )) {
tl_assert(valW == 0);
nd = (GlobalTreeNode*)keyW;
VG_(printf)(" ");
GlobalTreeNode__pp(nd);
VG_(printf)("\n");
}
VG_(doneIterFM)( gitree );
VG_(printf)(">>>\n");
}
/* Interval comparison function for GlobalTreeNode */
static Word cmp_intervals_GlobalTreeNode ( GlobalTreeNode* gn1,
GlobalTreeNode* gn2 )
{
return cmp_nonempty_intervals( gn1->addr, gn1->szB,
gn2->addr, gn2->szB );
}
/* Find the node holding 'a', if any. */
static GlobalTreeNode* find_GlobalTreeNode ( WordFM* gitree, Addr a )
{
UWord keyW, valW;
GlobalTreeNode key;
key.addr = a;
key.szB = 1;
if (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
GlobalTreeNode* res = (GlobalTreeNode*)keyW;
tl_assert(valW == 0);
tl_assert(res != &key);
return res;
} else {
return NULL;
}
}
/* Note that the supplied GlobalBlock must have been made persistent
already. */
static void add_block_to_GlobalTree (
/*MOD*/WordFM* gitree,
GlobalBlock* descr
)
{
Bool already_present;
GlobalTreeNode *nyu, *nd;
UWord keyW, valW;
static Int moans = 3;
tl_assert(descr->szB > 0);
nyu = sg_malloc( "di.sg_main.abtG.1", sizeof(GlobalTreeNode) );
nyu->addr = descr->addr;
nyu->szB = descr->szB;
nyu->descr = descr;
/* Basically it's an error to add a global block to the tree that
is already in the tree. However, detect and ignore attempts to
insert exact duplicates; they do appear for some reason
(possible a bug in m_debuginfo?) */
already_present = VG_(lookupFM)( gitree, &keyW, &valW, (UWord)nyu );
if (already_present) {
tl_assert(valW == 0);
nd = (GlobalTreeNode*)keyW;
tl_assert(nd);
tl_assert(nd != nyu);
tl_assert(nd->descr);
tl_assert(nyu->descr);
if (nd->addr == nyu->addr && nd->szB == nyu->szB
/* && 0 == VG_(strcmp)(nd->descr->name, nyu->descr->name) */
/* Although it seems reasonable to demand that duplicate
blocks have identical names, that is too strict. For
example, reading debuginfo from glibc produces two
otherwise identical blocks with names "tzname" and
"__tzname". A constraint of the form "must be identical,
or one must be a substring of the other" would fix that.
However, such trickery is scuppered by the fact that we
truncate all variable names to 15 characters to make
storage management simpler, hence giving pairs like
"__EI___pthread_" (truncated) vs "__pthread_keys". So
it's simplest just to skip the name comparison
completely. */
&& 0 == VG_(strcmp)(nd->descr->soname, nyu->descr->soname)) {
/* exact duplicate; ignore it */
sg_free(nyu);
return;
}
/* else fall through; the assertion below will catch it */
}
already_present = VG_(addToFM)( gitree, (UWord)nyu, 0 );
/* The interval can't already be there; else we have
overlapping global blocks. */
/* Unfortunately (25 Jan 09) at least icc11 has been seen to
generate overlapping block descriptions in the Dwarf3; clearly
bogus. */
if (already_present && moans > 0 && !VG_(clo_xml)) {
moans--;
VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: "
"overlapping global blocks\n");
if (VG_(clo_verbosity) >= 2) {
GlobalTree__pp( gitree,
"add_block_to_GlobalTree: non-exact duplicate" );
VG_(printf)("Overlapping block: ");
GlobalTreeNode__pp(nyu);
VG_(printf)("\n");
}
if (moans == 0)
VG_(message)(Vg_UserMsg, "Further instances of this "
"message will not be shown\n" );
}
/* tl_assert(!already_present); */
}
static Bool del_GlobalTree_range ( /*MOD*/WordFM* gitree,
Addr a, SizeT szB )
{
/* One easy way to do this: look up [a,a+szB) in the tree. That
will either succeed, producing a block which intersects that
range, in which case we delete it and repeat; or it will fail,
in which case there are no blocks intersecting the range, and we
can bring the process to a halt. */
UWord keyW, valW, oldK, oldV;
GlobalTreeNode key, *nd;
Bool b, anyFound;
tl_assert(szB > 0);
anyFound = False;
key.addr = a;
key.szB = szB;
while (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
anyFound = True;
nd = (GlobalTreeNode*)keyW;
tl_assert(valW == 0);
tl_assert(nd != &key);
tl_assert(cmp_nonempty_intervals(a, szB, nd->addr, nd->szB) == 0);
b = VG_(delFromFM)( gitree, &oldK, &oldV, (UWord)&key );
tl_assert(b);
tl_assert(oldV == 0);
tl_assert(oldK == keyW); /* check we deleted the node we just found */
}
return anyFound;
}
//////////////////////////////////////////////////////////////
// //
// Invar //
// //
//////////////////////////////////////////////////////////////
/* An invariant, as resulting from watching the destination of a
memory referencing instruction. Initially is Inv_Unset until the
instruction makes a first access. */
typedef
enum {
Inv_Unset=1, /* not established yet */
Inv_Unknown, /* unknown location */
Inv_Stack0, /* array-typed stack block in innermost frame */
Inv_StackN, /* array-typed stack block in non-innermost frame */
Inv_Global, /* array-typed global block */
}
InvarTag;
typedef
struct {
InvarTag tag;
union {
struct {
} Unset;
struct {
} Unknown;
struct {
Addr addr;
SizeT szB;
StackBlock* descr;
} Stack0; /* innermost stack frame */
struct {
/* Pointer to a node in the interval tree for
this thread. */
StackTreeNode* nd;
} StackN; /* non-innermost stack frame */
struct {
/* Pointer to a GlobalBlock in the interval tree of
global blocks. */
GlobalTreeNode* nd;
} Global;
}
Inv;
}
Invar;
/* Partial debugging printing for an Invar. */
static void pp_Invar ( Invar* i )
{
switch (i->tag) {
case Inv_Unset:
VG_(printf)("Unset");
break;
case Inv_Unknown:
VG_(printf)("Unknown");
break;
case Inv_Stack0:
VG_(printf)("Stack0 [%#lx,+%lu)",
i->Inv.Stack0.addr, i->Inv.Stack0.szB);
break;
case Inv_StackN:
VG_(printf)("StackN [%#lx,+%lu)",
i->Inv.StackN.nd->addr, i->Inv.StackN.nd->szB);
break;
case Inv_Global:
VG_(printf)("Global [%#lx,+%lu)",
i->Inv.Global.nd->addr, i->Inv.Global.nd->szB);
break;
default:
tl_assert(0);
}
}
/* Compare two Invars for equality. */
static Bool eq_Invar ( Invar* i1, Invar* i2 )
{
if (i1->tag != i2->tag)
return False;
switch (i1->tag) {
case Inv_Unset:
return True;
case Inv_Unknown:
return True;
case Inv_Stack0:
return i1->Inv.Stack0.addr == i2->Inv.Stack0.addr
&& i1->Inv.Stack0.szB == i2->Inv.Stack0.szB;
case Inv_StackN:
return i1->Inv.StackN.nd == i2->Inv.StackN.nd;
case Inv_Global:
return i1->Inv.Global.nd == i2->Inv.Global.nd;
default:
tl_assert(0);
}
/*NOTREACHED*/
tl_assert(0);
}
/* Generate a piece of text showing 'ea' is relative to 'invar', if
known. If unknown, generate an empty string. 'buf' must be at
least 32 bytes in size. Also return the absolute value of the
delta, if known, or zero if not known.
*/
static void gen_delta_str ( /*OUT*/HChar* buf,
/*OUT*/UWord* absDelta,
Invar* inv, Addr ea )
{
Addr block = 0;
SizeT szB = 0;
buf[0] = 0;
*absDelta = 0;
switch (inv->tag) {
case Inv_Unknown:
case Inv_Unset:
return; /* unknown */
case Inv_Stack0:
block = inv->Inv.Stack0.addr;
szB = inv->Inv.Stack0.szB;
break;
case Inv_StackN:
block = inv->Inv.StackN.nd->addr;
szB = inv->Inv.StackN.nd->szB;
break;
case Inv_Global:
block = inv->Inv.Global.nd->addr;
szB = inv->Inv.Global.nd->szB;
break;
default:
tl_assert(0);
}
tl_assert(szB > 0);
if (ea < block) {
*absDelta = block - ea;
VG_(sprintf)(buf, "%'lu before", *absDelta);
}
else if (ea >= block + szB) {
*absDelta = ea - (block + szB);
VG_(sprintf)(buf, "%'lu after", *absDelta);
}
else {
// Leave *absDelta at zero.
VG_(sprintf)(buf, "%'lu inside", ea - block);
}
}
/* Print selected parts of an Invar, suitable for use in error
messages. */
static void show_Invar( HChar* buf, Word nBuf, Invar* inv, Word depth )
{
const HChar* str;
tl_assert(nBuf >= 128);
buf[0] = 0;
switch (inv->tag) {
case Inv_Unknown:
VG_(sprintf)(buf, "%s", "unknown");
break;
case Inv_Stack0:
str = "array";
VG_(sprintf)(buf, "stack %s \"%s\" of size %'lu in this frame",
str, inv->Inv.Stack0.descr->name,
inv->Inv.Stack0.szB );
break;
case Inv_StackN:
str = "array";
VG_(sprintf)(buf, "stack %s \"%s\" of size %'lu in frame %lu back from here",
str, inv->Inv.StackN.nd->descr->name,
inv->Inv.StackN.nd->descr->szB,
depth - inv->Inv.StackN.nd->depth );
break;
case Inv_Global:
str = "array";
VG_(sprintf)(buf, "global %s \"%s\" of size %'lu in object with soname \"%s\"",
str, inv->Inv.Global.nd->descr->name,
inv->Inv.Global.nd->descr->szB,
inv->Inv.Global.nd->descr->soname );
break;
case Inv_Unset:
VG_(sprintf)(buf, "%s", "Unset!");
break;
default:
tl_assert(0);
}
}
//////////////////////////////////////////////////////////////
// //
// our globals //
// //
//////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////
///
#define N_QCACHE 16
/* Powers of two only, else the result will be chaos */
#define QCACHE_ADVANCE_EVERY 16
/* Per-thread query cache. Note that the invar can only be Inv_StackN
(but not Inv_Stack0), Inv_Global or Inv_Unknown. */
typedef
struct {
Addr addr;
SizeT szB;
Invar inv;
}
QCElem;
typedef
struct {
Word nInUse;
QCElem elems[N_QCACHE];
}
QCache;
static void QCache__invalidate ( QCache* qc ) {
tl_assert(qc->nInUse >= 0);
qc->nInUse = 0;
}
static void QCache__pp ( QCache* qc, const HChar* who )
{
Word i;
VG_(printf)("<<< QCache with %ld elements (%s)\n", qc->nInUse, who);
for (i = 0; i < qc->nInUse; i++) {
VG_(printf)(" [%#lx,+%#lx) ", qc->elems[i].addr, qc->elems[i].szB);
pp_Invar(&qc->elems[i].inv);
VG_(printf)("\n");
}
VG_(printf)(">>>\n");
}
static ULong stats__qcache_queries = 0;
static ULong stats__qcache_misses = 0;
static ULong stats__qcache_probes = 0;
///
//////////////////////////////////////////////////////////////
/* Each thread has:
* a shadow stack of StackFrames, which is a double-linked list
* an stack block interval tree
*/
static struct _StackFrame** shadowStacks;
static WordFM** /* StackTreeNode */ siTrees;
static QCache* qcaches;
/* Additionally, there is one global variable interval tree
for the entire process.
*/
static WordFM* /* GlobalTreeNode */ giTree;
static void invalidate_all_QCaches ( void )
{
Word i;
for (i = 0; i < VG_N_THREADS; i++) {
QCache__invalidate( &qcaches[i] );
}
}
static void ourGlobals_init ( void )
{
Word i;
shadowStacks = sg_malloc( "di.sg_main.oGi.2",
VG_N_THREADS * sizeof shadowStacks[0] );
siTrees = sg_malloc( "di.sg_main.oGi.3", VG_N_THREADS * sizeof siTrees[0] );
qcaches = sg_malloc( "di.sg_main.oGi.4", VG_N_THREADS * sizeof qcaches[0] );
for (i = 0; i < VG_N_THREADS; i++) {
shadowStacks[i] = NULL;
siTrees[i] = NULL;
qcaches[i] = (QCache){};
}
invalidate_all_QCaches();
giTree = VG_(newFM)( sg_malloc, "di.sg_main.oGi.1", sg_free,
(Word(*)(UWord,UWord))cmp_intervals_GlobalTreeNode );
}
//////////////////////////////////////////////////////////////
// //
// Handle global variable load/unload events //
// //
//////////////////////////////////////////////////////////////
static void acquire_globals ( ULong di_handle )
{
Word n, i;
XArray* /* of GlobalBlock */ gbs;
if (0) VG_(printf)("ACQUIRE GLOBALS %llu\n", di_handle );
gbs = VG_(di_get_global_blocks_from_dihandle)
(di_handle, True/*arrays only*/);
if (0) VG_(printf)(" GOT %ld globals\n", VG_(sizeXA)( gbs ));
n = VG_(sizeXA)( gbs );
for (i = 0; i < n; i++) {
GlobalBlock* gbp;
GlobalBlock* gb = VG_(indexXA)( gbs, i );
if (0) VG_(printf)(" new Global size %2lu at %#lx: %s %s\n",
gb->szB, gb->addr, gb->soname, gb->name );
tl_assert(gb->szB > 0);
/* Make a persistent copy of each GlobalBlock, and add it
to the tree. */
gbp = get_persistent_GlobalBlock( gb );
add_block_to_GlobalTree( giTree, gbp );
}
VG_(deleteXA)( gbs );
}
/* We only intercept these two because we need to see any di_handles
that might arise from the mappings/allocations. */
void sg_new_mem_mmap( Addr a, SizeT len,
Bool rr, Bool ww, Bool xx, ULong di_handle )
{
if (di_handle > 0)
acquire_globals(di_handle);
}
void sg_new_mem_startup( Addr a, SizeT len,
Bool rr, Bool ww, Bool xx, ULong di_handle )
{
if (di_handle > 0)
acquire_globals(di_handle);
}
void sg_die_mem_munmap ( Addr a, SizeT len )
{
Bool debug = (Bool)0;
Bool overlap = False;
if (debug) VG_(printf)("MUNMAP %#lx %lu\n", a, len );
if (len == 0)
return;
overlap = del_GlobalTree_range(giTree, a, len);
{ /* redundant sanity check */
UWord keyW, valW;
VG_(initIterFM)( giTree );
while (VG_(nextIterFM)( giTree, &keyW, &valW )) {
GlobalTreeNode* nd = (GlobalTreeNode*)keyW;
tl_assert(valW == 0);
tl_assert(nd->szB > 0);
tl_assert(nd->addr + nd->szB <= a
|| a + len <= nd->addr);
}
VG_(doneIterFM)( giTree );
}
if (!overlap)
return;
/* Ok, the range contained some blocks. Therefore we'll need to
visit all the Invars in all the thread shadow stacks, and
convert all Inv_Global entries that intersect [a,a+len) to
Inv_Unknown. */
tl_assert(len > 0);
preen_global_Invars( a, len );
invalidate_all_QCaches();
}
//////////////////////////////////////////////////////////////
// //
// StackFrame //
// //
//////////////////////////////////////////////////////////////
static ULong stats__total_accesses = 0;
static ULong stats__classify_Stack0 = 0;
static ULong stats__classify_StackN = 0;
static ULong stats__classify_Global = 0;
static ULong stats__classify_Unknown = 0;
static ULong stats__Invars_preened = 0;
static ULong stats__Invars_changed = 0;
static ULong stats__t_i_b_empty = 0;
static ULong stats__htab_fast = 0;
static ULong stats__htab_searches = 0;
static ULong stats__htab_probes = 0;
static ULong stats__htab_resizes = 0;
/* A dynamic instance of an instruction */
typedef
struct {
/* IMMUTABLE */
Addr insn_addr; /* NB! zero means 'not in use' */
XArray* blocks; /* XArray* of StackBlock, or NULL if none */
/* MUTABLE */
Invar invar;
}
IInstance;
#define N_HTAB_FIXED 64
typedef
struct _StackFrame {
/* The sp when the frame was created, so we know when to get rid
of it. */
Addr creation_sp;
/* The stack frames for a thread are arranged as a doubly linked
list. Obviously the outermost frame in the stack has .outer
as NULL and the innermost in theory has .inner as NULL.
However, when a function returns, we don't delete the
just-vacated StackFrame. Instead, it is retained in the list
and will be re-used when the next call happens. This is so
as to avoid constantly having to dynamically allocate and
deallocate frames. */
struct _StackFrame* inner;
struct _StackFrame* outer;
Word depth; /* 0 for outermost; increases inwards */
/* Information for each memory referencing instruction, for this
instantiation of the function. The iinstances array is
operated as a simple linear-probe hash table, which is
dynamically expanded as necessary. Once critical thing is
that an IInstance with a .insn_addr of zero is interpreted to
mean that hash table slot is unused. This means we can't
store an IInstance for address zero. */
/* Note that htab initially points to htab_fixed. If htab_fixed
turns out not to be big enough then htab is made to point to
dynamically allocated memory. But it's often the case that
htab_fixed is big enough, so this optimisation saves a huge
number of sg_malloc/sg_free call pairs. */
IInstance* htab;
UWord htab_size; /* size of hash table, MAY ONLY BE A POWER OF 2 */
UWord htab_used; /* number of hash table slots currently in use */
/* If this frame is currently making a call, then the following
are relevant. */
Addr sp_at_call;
Addr fp_at_call;
XArray* /* of Addr */ blocks_added_by_call;
/* See comment just above */
IInstance htab_fixed[N_HTAB_FIXED];
}
StackFrame;
/* Move this somewhere else? */
/* Visit all Invars in the entire system. If 'isHeap' is True, change
all Inv_Heap Invars that intersect [a,a+len) to Inv_Unknown. If
'isHeap' is False, do the same but to the Inv_Global{S,V} Invars
instead. */
__attribute__((noinline))
static void preen_global_Invar ( Invar* inv, Addr a, SizeT len )
{
stats__Invars_preened++;
tl_assert(len > 0);
tl_assert(inv);
switch (inv->tag) {
case Inv_Global:
tl_assert(inv->Inv.Global.nd);
tl_assert(inv->Inv.Global.nd->szB > 0);
if (0) VG_(printf)("preen_Invar Global %#lx %lu\n",
inv->Inv.Global.nd->addr,
inv->Inv.Global.nd->szB);
if (0 == cmp_nonempty_intervals(a, len, inv->Inv.Global.nd->addr,
inv->Inv.Global.nd->szB)) {
inv->tag = Inv_Unknown;
stats__Invars_changed++;
}
break;
case Inv_Stack0:
case Inv_StackN:
case Inv_Unknown:
break;
case Inv_Unset: /* this should never happen */
/* fallthrough */
default:
tl_assert(0);
}
}
__attribute__((noinline))
static void preen_global_Invars ( Addr a, SizeT len )
{
Int i;
UWord u;
StackFrame* frame;
tl_assert(len > 0);
for (i = 0; i < VG_N_THREADS; i++) {
frame = shadowStacks[i];
if (!frame)
continue; /* no frames for this thread */
/* start from the innermost frame */
while (frame->inner)
frame = frame->inner;
tl_assert(frame->outer);
/* work through the frames from innermost to outermost. The
order isn't important; we just need to ensure we visit each
frame once (including those which are not actually active,
more 'inner' than the 'innermost active frame', viz, just
hanging around waiting to be used, when the current innermost
active frame makes more calls. See comments on definition of
struct _StackFrame. */
for (; frame; frame = frame->outer) {
UWord xx = 0; /* sanity check only; count of used htab entries */
if (!frame->htab)
continue; /* frame not in use. See shadowStack_unwind(). */
for (u = 0; u < frame->htab_size; u++) {
IInstance* ii = &frame->htab[u];
if (ii->insn_addr == 0)
continue; /* not in use */
if (0) { pp_Invar(&ii->invar); VG_(printf)(" x\n"); }
preen_global_Invar( &ii->invar, a, len );
xx++;
}
tl_assert(xx == frame->htab_used);
}
}
}
/* XXX this should be >> 2 on ppc32/64 since the bottom two bits
of the ip are guaranteed to be zero */
inline static UWord compute_II_hash ( Addr ip, UWord htab_size ) {
return (ip >> 0) & (htab_size - 1);
}
__attribute__((noinline))
static void initialise_II_hash_table ( StackFrame* sf )
{
UWord i;
sf->htab_size = N_HTAB_FIXED; /* initial hash table size */
sf->htab = &sf->htab_fixed[0];
tl_assert(sf->htab);
sf->htab_used = 0;
for (i = 0; i < sf->htab_size; i++)
sf->htab[i].insn_addr = 0; /* NOT IN USE */
}
__attribute__((noinline))
static void resize_II_hash_table ( StackFrame* sf )
{
UWord i, j, ix, old_size, new_size;
IInstance *old_htab, *new_htab, *old;
tl_assert(sf && sf->htab);
old_size = sf->htab_size;
new_size = 2 * old_size;
old_htab = sf->htab;
new_htab = sg_malloc( "di.sg_main.rIht.1",
new_size * sizeof(IInstance) );
for (i = 0; i < new_size; i++) {
new_htab[i].insn_addr = 0; /* NOT IN USE */
}
for (i = 0; i < old_size; i++) {
old = &old_htab[i];
if (old->insn_addr == 0 /* NOT IN USE */)
continue;
ix = compute_II_hash(old->insn_addr, new_size);
/* find out where to put this, in the new table */
j = new_size;
while (1) {
if (new_htab[ix].insn_addr == 0)
break;
/* This can't ever happen, because it would mean the new
table is full; that isn't allowed -- even the old table is
only allowed to become half full. */
tl_assert(j > 0);
j--;
ix++; if (ix == new_size) ix = 0;
}
/* copy the old entry to this location */
tl_assert(ix < new_size);
tl_assert(new_htab[ix].insn_addr == 0);
new_htab[ix] = *old;
tl_assert(new_htab[ix].insn_addr != 0);
}
/* all entries copied; free old table. */
if (old_htab != &sf->htab_fixed[0])
sg_free(old_htab);
sf->htab = new_htab;
sf->htab_size = new_size;
/* check sf->htab_used is correct. Optional and a bit expensive
but anyway: */
j = 0;
for (i = 0; i < new_size; i++) {
if (new_htab[i].insn_addr != 0) {
j++;
}
}
tl_assert(j == sf->htab_used);
if (0) VG_(printf)("resized tab for SF %p to %lu\n", sf, new_size);
}
__attribute__((noinline))
static IInstance* find_or_create_IInstance_SLOW (
StackFrame* sf,
Addr ip,
XArray* /* StackBlock */ ip_frameblocks
)
{
UWord i, ix;
stats__htab_searches++;
tl_assert(sf);
tl_assert(sf->htab);
/* Make sure the table loading doesn't get too high. */
if (UNLIKELY(2 * sf->htab_used >= 1 * sf->htab_size)) {
stats__htab_resizes++;
resize_II_hash_table(sf);
}
tl_assert(2 * sf->htab_used <= sf->htab_size);
ix = compute_II_hash(ip, sf->htab_size);
i = sf->htab_size;
while (1) {
stats__htab_probes++;
/* Note that because of the way the fast-case handler works,
these two tests are actually redundant in the first iteration
of this loop. (Except they aren't redundant if the code just
above resized the table first. :-) */
if (sf->htab[ix].insn_addr == ip)
return &sf->htab[ix];
if (sf->htab[ix].insn_addr == 0)
break;
/* If i ever gets to zero and we have found neither what we're
looking for nor an empty slot, the table must be full. Which
isn't possible -- we monitor the load factor to ensure it
doesn't get above say 50%; if that ever does happen the table
is resized. */
tl_assert(i > 0);
i--;
ix++;
if (ix == sf->htab_size) ix = 0;
}
/* So now we've found a free slot at ix, and we can use that. */
tl_assert(sf->htab[ix].insn_addr == 0);
/* Add a new record in this slot. */
tl_assert(ip != 0); /* CAN'T REPRESENT THIS */
sf->htab[ix].insn_addr = ip;
sf->htab[ix].blocks = ip_frameblocks;
sf->htab[ix].invar.tag = Inv_Unset;
sf->htab_used++;
return &sf->htab[ix];
}
inline
static IInstance* find_or_create_IInstance (
StackFrame* sf,
Addr ip,
XArray* /* StackBlock */ ip_frameblocks
)
{
UWord ix = compute_II_hash(ip, sf->htab_size);
/* Is it in the first slot we come to? */
if (LIKELY(sf->htab[ix].insn_addr == ip)) {
stats__htab_fast++;
return &sf->htab[ix];
}
/* If the first slot we come to is empty, bag it. */
if (LIKELY(sf->htab[ix].insn_addr == 0)) {
stats__htab_fast++;
tl_assert(ip != 0);
sf->htab[ix].insn_addr = ip;
sf->htab[ix].blocks = ip_frameblocks;
sf->htab[ix].invar.tag = Inv_Unset;
sf->htab_used++;
return &sf->htab[ix];
}
/* Otherwise we hand off to the slow case, which searches other
slots, and optionally resizes the table if necessary. */
return find_or_create_IInstance_SLOW( sf, ip, ip_frameblocks );
}
__attribute__((noinline))
static Addr calculate_StackBlock_EA ( StackBlock* descr,
Addr sp, Addr fp ) {
UWord w1 = (UWord)descr->base;
UWord w2 = (UWord)(descr->spRel ? sp : fp);
UWord ea = w1 + w2;
return ea;
}
/* Given an array of StackBlocks, return an array of Addrs, holding
their effective addresses. Caller deallocates result array. */
__attribute__((noinline))
static XArray* /* Addr */ calculate_StackBlock_EAs (
XArray* /* StackBlock */ blocks,
Addr sp, Addr fp
)
{
XArray* res;
Word i, n = VG_(sizeXA)( blocks );
tl_assert(n > 0);
res = VG_(newXA)( sg_malloc, "di.sg_main.cSBE.1", sg_free, sizeof(Addr) );
for (i = 0; i < n; i++) {
StackBlock* blk = VG_(indexXA)( blocks, i );
Addr ea = calculate_StackBlock_EA( blk, sp, fp );
VG_(addToXA)( res, &ea );
}
return res;
}
/* Try to classify the block into which a memory access falls, and
write the result in 'inv'. This writes all relevant fields of
'inv'. */
__attribute__((noinline))
static void classify_address ( /*OUT*/Invar* inv,
ThreadId tid,
Addr ea, Addr sp, Addr fp,
UWord szB,
XArray* /* of StackBlock */ thisInstrBlocks )
{
tl_assert(szB > 0);
/* First, look in the stack blocks accessible in this instruction's
frame. */
{
Word i, nBlocks = VG_(sizeXA)( thisInstrBlocks );
if (nBlocks == 0) stats__t_i_b_empty++;
for (i = 0; i < nBlocks; i++) {
StackBlock* descr = VG_(indexXA)( thisInstrBlocks, i );
Addr bea = calculate_StackBlock_EA( descr, sp, fp );
if (bea <= ea && ea + szB <= bea + descr->szB) {
/* found it */
inv->tag = Inv_Stack0;
inv->Inv.Stack0.addr = bea;
inv->Inv.Stack0.szB = descr->szB;
inv->Inv.Stack0.descr = descr;
stats__classify_Stack0++;
return;
}
}
}
/* Look in this thread's query cache */
{ Word i;
QCache* cache = &qcaches[tid];
static UWord ctr = 0;
stats__qcache_queries++;
for (i = 0; i < cache->nInUse; i++) {
if (0) /* expensive in a loop like this */
tl_assert(cache->elems[i].addr + cache->elems[i].szB != 0);
stats__qcache_probes++;
if (is_subinterval_of(cache->elems[i].addr,
cache->elems[i].szB, ea, szB)) {
if (i > 0
&& (ctr++ & (QCACHE_ADVANCE_EVERY-1)) == 0) {
QCElem tmp;
tmp = cache->elems[i-1];
cache->elems[i-1] = cache->elems[i];
cache->elems[i] = tmp;
i--;
}
*inv = cache->elems[i].inv;
return;
}
}
stats__qcache_misses++;
}
/* Ok, so it's not a block in the top frame. Perhaps it's a block
in some calling frame? Consult this thread's stack-block
interval tree to find out. */
{ StackTreeNode* nd = find_StackTreeNode( siTrees[tid], ea );
/* We know that [ea,ea+1) is in the block, but we need to
restrict to the case where the whole access falls within
it. */
if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
nd = NULL;
}
if (nd) {
/* found it */
inv->tag = Inv_StackN;
inv->Inv.StackN.nd = nd;
stats__classify_StackN++;
goto out;
}
}
/* Not in a stack block. Try the global pool. */
{ GlobalTreeNode* nd = find_GlobalTreeNode(giTree, ea);
/* We know that [ea,ea+1) is in the block, but we need to
restrict to the case where the whole access falls within
it. */
if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
nd = NULL;
}
if (nd) {
/* found it */
inv->tag = Inv_Global;
inv->Inv.Global.nd = nd;
stats__classify_Global++;
goto out;
}
}
/* No idea - give up. */
inv->tag = Inv_Unknown;
stats__classify_Unknown++;
/* Update the cache */
out:
{ Addr toadd_addr = 0;
SizeT toadd_szB = 0;
QCache* cache = &qcaches[tid];
static UWord ctr = 0;
Bool show = False;
if (0 && 0 == (ctr++ & 0x1FFFFF)) show = True;
if (show) QCache__pp(cache, "before upd");
switch (inv->tag) {
case Inv_Global:
toadd_addr = inv->Inv.Global.nd->addr;
toadd_szB = inv->Inv.Global.nd->szB;
break;
case Inv_StackN:
toadd_addr = inv->Inv.StackN.nd->addr;
toadd_szB = inv->Inv.StackN.nd->szB;
break;
case Inv_Unknown: {
/* This is more complex. We need to figure out the
intersection of the "holes" in the global and stack
interval trees into which [ea,ea+szB) falls. This is
further complicated by the fact that [ea,ea+szB) might
not fall cleanly into a hole; it may instead fall across
the boundary of a stack or global block. In that case
we just ignore it and don't update the cache, since we
have no way to represent this situation precisely. */
StackTreeNode sNegInf, sPosInf, sKey, *sLB, *sUB;
GlobalTreeNode gNegInf, gPosInf, gKey, *gLB, *gUB;
Addr gMin, gMax, sMin, sMax, uMin, uMax;
Bool sOK, gOK;
sNegInf.addr = 0;
sNegInf.szB = 1;
sPosInf.addr = ~(UWord)0;
sPosInf.szB = 1;
gNegInf.addr = 0;
gNegInf.szB = 1;
gPosInf.addr = ~(UWord)0;
gPosInf.szB = 1;
sKey.addr = ea;
sKey.szB = szB;
gKey.addr = ea;
gKey.szB = szB;
if (0) VG_(printf)("Tree sizes %lu %lu\n",
VG_(sizeFM)(siTrees[tid]), VG_(sizeFM)(giTree));
sOK = VG_(findBoundsFM)( siTrees[tid],
(UWord*)&sLB, NULL/*unused*/,
(UWord*)&sUB, NULL/*unused*/,
(UWord)&sNegInf, 0/*unused*/,
(UWord)&sPosInf, 0/*unused*/,
(UWord)&sKey );
gOK = VG_(findBoundsFM)( giTree,
(UWord*)&gLB, NULL/*unused*/,
(UWord*)&gUB, NULL/*unused*/,
(UWord)&gNegInf, 0/*unused*/,
(UWord)&gPosInf, 0/*unused*/,
(UWord)&gKey );
if (!(sOK && gOK)) {
/* If this happens, then [ea,ea+szB) partially overlaps
a heap or stack block. We can't represent that, so
just forget it (should be very rare). However, do
maximum sanity checks first. In such a
partial overlap case, it can't be the case that both
[ea] and [ea+szB-1] overlap the same block, since if
that were indeed the case then it wouldn't be a
partial overlap; rather it would simply fall inside
that block entirely and we shouldn't be inside this
conditional at all. */
if (!sOK) {
StackTreeNode *ndFirst, *ndLast;
ndFirst = find_StackTreeNode( siTrees[tid], ea );
ndLast = find_StackTreeNode( siTrees[tid], ea+szB-1 );
/* if both ends of the range fall inside a block,
they can't be in the same block. */
if (ndFirst && ndLast)
tl_assert(ndFirst != ndLast);
/* for each end of the range, if it is in a block,
the range as a whole can't be entirely within the
block. */
if (ndFirst)
tl_assert(!is_subinterval_of(ndFirst->addr,
ndFirst->szB, ea, szB));
if (ndLast)
tl_assert(!is_subinterval_of(ndLast->addr,
ndLast->szB, ea, szB));
}
if (!gOK) {
GlobalTreeNode *ndFirst, *ndLast;
ndFirst = find_GlobalTreeNode( giTree, ea );
ndLast = find_GlobalTreeNode( giTree, ea+szB-1 );
/* if both ends of the range fall inside a block,
they can't be in the same block. */
if (ndFirst && ndLast)
tl_assert(ndFirst != ndLast);
/* for each end of the range, if it is in a block,
the range as a whole can't be entirely within the
block. */
if (ndFirst)
tl_assert(!is_subinterval_of(ndFirst->addr,
ndFirst->szB, ea, szB));
if (ndLast)
tl_assert(!is_subinterval_of(ndLast->addr,
ndLast->szB, ea, szB));
}
if (0) VG_(printf)("overlapping blocks in cache\n");
return;
}
sMin = sLB == &sNegInf ? 0 : (sLB->addr + sLB->szB);
sMax = sUB == &sPosInf ? ~(UWord)0 : (sUB->addr - 1);
gMin = gLB == &gNegInf ? 0 : (gLB->addr + gLB->szB);
gMax = gUB == &gPosInf ? ~(UWord)0 : (gUB->addr - 1);
if (0) VG_(printf)("sMin %lx sMax %lx gMin %lx gMax %lx\n",
sMin, sMax, gMin, gMax);
/* [sMin,sMax] and [gMin,gMax] must both contain
[ea,ea+szB) (right?) That implies they must overlap at
at least over [ea,ea+szB). */
tl_assert(sMin <= ea && ea+szB-1 <= sMax);
tl_assert(gMin <= ea && ea+szB-1 <= gMax);
/* So now compute their intersection. */
uMin = Addr__max( sMin, gMin );
uMax = Addr__min( sMax, gMax );
if (0) VG_(printf)("uMin %lx uMax %lx\n", uMin, uMax);
tl_assert(uMin <= uMax);
tl_assert(uMin <= ea && ea+szB-1 <= uMax);
/* Finally, we can park [uMin,uMax] in the cache. However,
if uMax is ~0, we can't represent the difference; hence
fudge uMax. */
if (uMin < uMax && uMax == ~(UWord)0)
uMax--;
toadd_addr = uMin;
toadd_szB = uMax - uMin + 1;
break;
}
default:
/* We should only be caching info for the above 3 cases */
tl_assert(0);
} /* switch (inv->tag) */
{ /* and actually add this to the cache, finally */
Word i;
Word ip = cache->nInUse / 2; /* doesn't seem critical */
if (cache->nInUse < N_QCACHE)
cache->nInUse++;
for (i = cache->nInUse-1; i > ip; i--) {
cache->elems[i] = cache->elems[i-1];
}
tl_assert(toadd_szB > 0);
cache->elems[ip].addr = toadd_addr;
cache->elems[ip].szB = toadd_szB;
cache->elems[ip].inv = *inv;
}
if (show) QCache__pp(cache, "after upd");
}
}
/* CALLED FROM GENERATED CODE */
static
VG_REGPARM(3)
void helperc__mem_access ( /* Known only at run time: */
Addr ea, Addr sp, Addr fp,
/* Known at translation time: */
Word sszB, Addr ip, XArray* ip_frameBlocks )
{
UWord szB;
IInstance* iinstance;
Invar* inv;
Invar new_inv;
ThreadId tid = VG_(get_running_tid)();
StackFrame* frame;
HChar bufE[160], bufA[160], bufD[32];
stats__total_accesses++;
tl_assert(is_sane_TId(tid));
frame = shadowStacks[tid];
tl_assert(frame);
/* Find the instance info for this instruction. */
tl_assert(ip_frameBlocks);
iinstance = find_or_create_IInstance( frame, ip, ip_frameBlocks );
tl_assert(iinstance);
tl_assert(iinstance->blocks == ip_frameBlocks);
szB = (sszB < 0) ? (-sszB) : sszB;
tl_assert(szB > 0);
inv = &iinstance->invar;
/* Deal with first uses of instruction instances. */
if (inv->tag == Inv_Unset) {
/* This is the first use of this instance of the instruction, so
we can't make any check; we merely record what we saw, so we
can compare it against what happens for 2nd and subsequent
accesses. */
classify_address( inv,
tid, ea, sp, fp, szB, iinstance->blocks );
tl_assert(inv->tag != Inv_Unset);
return;
}
/* So generate an Invar and see if it's different from what
we had before. */
classify_address( &new_inv,
tid, ea, sp, fp, szB, iinstance->blocks );
tl_assert(new_inv.tag != Inv_Unset);
/* Did we see something different from before? If no, then there's
no error. */
tl_assert(inv->tag != Inv_Unset);
if (LIKELY(eq_Invar(&new_inv, inv)))
return;
VG_(memset)(bufE, 0, sizeof(bufE));
show_Invar( bufE, sizeof(bufE)-1, inv, frame->depth );
VG_(memset)(bufA, 0, sizeof(bufA));
show_Invar( bufA, sizeof(bufA)-1, &new_inv, frame->depth );
VG_(memset)(bufD, 0, sizeof(bufD));
UWord absDelta;
gen_delta_str( bufD, &absDelta, inv, ea );
if (absDelta < 1024)
sg_record_error_SorG( tid, ea, sszB, bufE, bufA, bufD );
/* And now install the new observation as "standard", so as to
make future error messages make more sense. */
*inv = new_inv;
}
////////////////////////////////////////
/* Primary push-a-new-frame routine. Called indirectly from
generated code. */
static UWord stats__max_sitree_size = 0;
static UWord stats__max_gitree_size = 0;
static
void shadowStack_new_frame ( ThreadId tid,
Addr sp_at_call_insn,
Addr sp_post_call_insn,
Addr fp_at_call_insn,
Addr ip_post_call_insn,
XArray* descrs_at_call_insn )
{
StackFrame *callee, *caller;
tl_assert(is_sane_TId(tid));
caller = shadowStacks[tid];
tl_assert(caller);
if (caller->outer) { /* "this is not the outermost frame" */
tl_assert(caller->outer->inner == caller);
tl_assert(caller->outer->depth >= 0);
tl_assert(1 + caller->outer->depth == caller->depth);
} else {
tl_assert(caller->depth == 0);
}
caller->sp_at_call = sp_at_call_insn;
caller->fp_at_call = fp_at_call_insn;
if (descrs_at_call_insn) {
tl_assert( VG_(sizeXA)(descrs_at_call_insn) > 0 );
caller->blocks_added_by_call
= calculate_StackBlock_EAs( descrs_at_call_insn,
sp_at_call_insn, fp_at_call_insn );
if (caller->blocks_added_by_call)
add_blocks_to_StackTree( siTrees[tid],
descrs_at_call_insn,
caller->blocks_added_by_call,
caller->depth /* stack depth at which
these blocks are
considered to exist*/ );
if (1) {
UWord s = VG_(sizeFM)( siTrees[tid] );
UWord g = VG_(sizeFM)( giTree );
Bool sb = s > stats__max_sitree_size;
Bool gb = g > stats__max_gitree_size;
if (sb) stats__max_sitree_size = s;
if (gb) stats__max_gitree_size = g;
if (0 && (sb || gb))
VG_(message)(Vg_DebugMsg,
"exp-sgcheck: new max tree sizes: "
"StackTree %lu, GlobalTree %lu\n",
stats__max_sitree_size, stats__max_gitree_size );
}
} else {
caller->blocks_added_by_call = NULL;
}
/* caller->blocks_added_by_call is used again (and then freed) when
this frame is removed from the stack. */
if (caller->inner) {
callee = caller->inner;
} else {
callee = sg_malloc("di.sg_main.sSnf.1", sizeof(StackFrame));
VG_(memset)(callee, 0, sizeof(StackFrame));
callee->outer = caller;
caller->inner = callee;
callee->depth = 1 + caller->depth;
tl_assert(callee->inner == NULL);
}
/* This sets up .htab, .htab_size and .htab_used */
initialise_II_hash_table( callee );
callee->creation_sp = sp_post_call_insn;
callee->sp_at_call = 0; // not actually required ..
callee->fp_at_call = 0; // .. these 3 initialisations are ..
callee->blocks_added_by_call = NULL; // .. just for cleanness
/* record the new running stack frame */
shadowStacks[tid] = callee;
/* and this thread's query cache is now invalid */
QCache__invalidate( &qcaches[tid] );
if (0)
{ Word d = callee->depth;
const HChar *fnname;
Bool ok;
Addr ip = ip_post_call_insn;
ok = VG_(get_fnname_w_offset)( ip, &fnname );
while (d > 0) {
VG_(printf)(" ");
d--;
}
VG_(printf)("> %s %#lx\n", ok ? fnname : "???", ip);
}
}
/* CALLED FROM GENERATED CODE */
static
VG_REGPARM(3)
void helperc__new_frame ( Addr sp_post_call_insn,
Addr fp_at_call_insn,
Addr ip_post_call_insn,
XArray* blocks_at_call_insn,
Word sp_adjust )
{
ThreadId tid = VG_(get_running_tid)();
Addr sp_at_call_insn = sp_post_call_insn + sp_adjust;
shadowStack_new_frame( tid,
sp_at_call_insn,
sp_post_call_insn,
fp_at_call_insn,
ip_post_call_insn,
blocks_at_call_insn );
}
////////////////////////////////////////
/* Primary remove-frame(s) routine. Called indirectly from
generated code. */
__attribute__((noinline))
static void shadowStack_unwind ( ThreadId tid, Addr sp_now )
{
StackFrame *innermost, *innermostOrig;
tl_assert(is_sane_TId(tid));
innermost = shadowStacks[tid];
tl_assert(innermost);
innermostOrig = innermost;
//VG_(printf)("UNWIND sp_new = %p\n", sp_now);
while (1) {
if (!innermost->outer)
break;
if (innermost->inner)
tl_assert(innermost->inner->outer == innermost);
tl_assert(innermost->outer->inner == innermost);
tl_assert(innermost->blocks_added_by_call == NULL);
if (sp_now <= innermost->creation_sp) break;
//VG_(printf)("UNWIND dump %p\n", innermost->creation_sp);
tl_assert(innermost->htab);
if (innermost->htab != &innermost->htab_fixed[0])
sg_free(innermost->htab);
/* be on the safe side */
innermost->creation_sp = 0;
innermost->htab = NULL;
innermost->htab_size = 0;
innermost->htab_used = 0;
innermost->sp_at_call = 0;
innermost->fp_at_call = 0;
innermost->blocks_added_by_call = NULL;
innermost = innermost->outer;
/* So now we're "back" in the calling frame. Remove from this
thread's stack-interval-tree, the blocks added at the time of
the call. */
if (innermost->outer) { /* not at the outermost frame */
if (innermost->blocks_added_by_call == NULL) {
} else {
del_blocks_from_StackTree( siTrees[tid],
innermost->blocks_added_by_call );
VG_(deleteXA)( innermost->blocks_added_by_call );
innermost->blocks_added_by_call = NULL;
}
}
/* That completes the required tidying of the interval tree
associated with the frame we just removed. */
if (0) {
Word d = innermost->depth;
while (d > 0) {
VG_(printf)(" ");
d--;
}
VG_(printf)("X\n");
}
}
tl_assert(innermost);
if (innermost != innermostOrig) {
shadowStacks[tid] = innermost;
/* this thread's query cache is now invalid */
QCache__invalidate( &qcaches[tid] );
}
}
//////////////////////////////////////////////////////////////
// //
// Instrumentation //
// //
//////////////////////////////////////////////////////////////
/* What does instrumentation need to do?
- at each Call transfer, generate a call to shadowStack_new_frame
do this by manually inspecting the IR
- at each sp change, if the sp change is negative,
call shadowStack_unwind
do this by asking for SP-change analysis
- for each memory referencing instruction,
call helperc__mem_access
*/
/* A complication: sg_ instrumentation and h_ instrumentation need to
be interleaved. Since the latter is a lot more complex than the
former, we split the sg_ instrumentation here into four functions
and let the h_ instrumenter call the four functions as it goes.
Hence the h_ instrumenter drives the sg_ instrumenter.
To make this viable, the sg_ instrumenter carries what running
state it needs in 'struct _SGEnv'. This is exported only
abstractly from this file.
*/
struct _SGEnv {
/* the current insn's IP */
Addr curr_IP;
/* whether the above is actually known */
Bool curr_IP_known;
/* if we find a mem ref, is it the first for this insn? Used for
detecting insns which make more than one memory ref, a situation
we basically can't really handle properly; and so we ignore all
but the first ref. */
Bool firstRef;
/* READONLY */
IRTemp (*newIRTemp_cb)(IRType,void*);
void* newIRTemp_opaque;
};
/* --- Helper fns for instrumentation --- */
static IRTemp gen_Get_SP ( struct _SGEnv* sge,
IRSB* bbOut,
const VexGuestLayout* layout,
Int hWordTy_szB )
{
IRExpr* sp_expr;
IRTemp sp_temp;
IRType sp_type;
/* This in effect forces the host and guest word sizes to be the
same. */
tl_assert(hWordTy_szB == layout->sizeof_SP);
sp_type = layout->sizeof_SP == 8 ? Ity_I64 : Ity_I32;
sp_expr = IRExpr_Get( layout->offset_SP, sp_type );
sp_temp = sge->newIRTemp_cb( sp_type, sge->newIRTemp_opaque );
addStmtToIRSB( bbOut, IRStmt_WrTmp( sp_temp, sp_expr ) );
return sp_temp;
}
static IRTemp gen_Get_FP ( struct _SGEnv* sge,
IRSB* bbOut,
const VexGuestLayout* layout,
Int hWordTy_szB )
{
IRExpr* fp_expr;
IRTemp fp_temp;
IRType fp_type;
/* This in effect forces the host and guest word sizes to be the
same. */
tl_assert(hWordTy_szB == layout->sizeof_SP);
fp_type = layout->sizeof_FP == 8 ? Ity_I64 : Ity_I32;
fp_expr = IRExpr_Get( layout->offset_FP, fp_type );
fp_temp = sge->newIRTemp_cb( fp_type, sge->newIRTemp_opaque );
addStmtToIRSB( bbOut, IRStmt_WrTmp( fp_temp, fp_expr ) );
return fp_temp;
}
static void instrument_mem_access ( struct _SGEnv* sge,
IRSB* bbOut,
IRExpr* addr,
Int szB,
Bool isStore,
Int hWordTy_szB,
Addr curr_IP,
const VexGuestLayout* layout )
{
IRType tyAddr = Ity_INVALID;
XArray* frameBlocks = NULL;
tl_assert(isIRAtom(addr));
tl_assert(hWordTy_szB == 4 || hWordTy_szB == 8);
tyAddr = typeOfIRExpr( bbOut->tyenv, addr );
tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
#if defined(VGA_x86)
{ UChar* p = (UChar*)curr_IP;
// pop %ebp; RET
if (p[0] == 0xc3 && p[-1] == 0x5d) return;
// pop %ebp; RET $imm16
if (p[0] == 0xc2 && p[-1] == 0x5d) return;
// PUSH %EBP; mov %esp,%ebp
if (p[0] == 0x55 && p[1] == 0x89 && p[2] == 0xe5) return;
}
#endif
/* First off, find or create the StackBlocks for this instruction. */
frameBlocks = get_StackBlocks_for_IP( curr_IP );
tl_assert(frameBlocks);
//if (VG_(sizeXA)( frameBlocks ) == 0)
// frameBlocks = NULL;
/* Generate a call to "helperc__mem_access", passing:
addr current_SP current_FP szB curr_IP frameBlocks
*/
{ IRTemp t_SP = gen_Get_SP( sge, bbOut, layout, hWordTy_szB );
IRTemp t_FP = gen_Get_FP( sge, bbOut, layout, hWordTy_szB );
IRExpr** args
= mkIRExprVec_6( addr,
IRExpr_RdTmp(t_SP),
IRExpr_RdTmp(t_FP),
mkIRExpr_HWord( isStore ? (-szB) : szB ),
mkIRExpr_HWord( curr_IP ),
mkIRExpr_HWord( (HWord)frameBlocks ) );
IRDirty* di
= unsafeIRDirty_0_N( 3/*regparms*/,
"helperc__mem_access",
VG_(fnptr_to_fnentry)( &helperc__mem_access ),
args );
addStmtToIRSB( bbOut, IRStmt_Dirty(di) );
}
}
/* --- Instrumentation main (4 fns) --- */
struct _SGEnv * sg_instrument_init ( IRTemp (*newIRTemp_cb)(IRType,void*),
void* newIRTemp_opaque )
{
struct _SGEnv * env = sg_malloc("di.sg_main.sii.1",
sizeof(struct _SGEnv));
tl_assert(env);
env->curr_IP = 0;
env->curr_IP_known = False;
env->firstRef = True;
env->newIRTemp_cb = newIRTemp_cb;
env->newIRTemp_opaque = newIRTemp_opaque;
return env;
}
void sg_instrument_fini ( struct _SGEnv * env )
{
sg_free(env);
}
/* Add instrumentation for 'st' to 'sbOut', and possibly modify 'env'
as required. This must be called before 'st' itself is added to
'sbOut'. */
void sg_instrument_IRStmt ( /*MOD*/struct _SGEnv * env,
/*MOD*/IRSB* sbOut,
IRStmt* st,
const VexGuestLayout* layout,
IRType gWordTy, IRType hWordTy )
{
if (!sg_clo_enable_sg_checks)
return;
tl_assert(st);
tl_assert(isFlatIRStmt(st));
switch (st->tag) {
case Ist_NoOp:
case Ist_AbiHint:
case Ist_Put:
case Ist_PutI:
case Ist_MBE:
/* None of these can contain any memory references. */
break;
case Ist_Exit:
tl_assert(st->Ist.Exit.jk != Ijk_Call);
/* else we must deal with a conditional call */
break;
case Ist_IMark:
env->curr_IP_known = True;
env->curr_IP = st->Ist.IMark.addr;
env->firstRef = True;
break;
case Ist_Store:
tl_assert(env->curr_IP_known);
if (env->firstRef) {
instrument_mem_access(
env, sbOut,
st->Ist.Store.addr,
sizeofIRType(typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data)),
True/*isStore*/,
sizeofIRType(hWordTy),
env->curr_IP, layout
);
env->firstRef = False;
}
break;
case Ist_WrTmp: {
IRExpr* data = st->Ist.WrTmp.data;
if (data->tag == Iex_Load) {
tl_assert(env->curr_IP_known);
if (env->firstRef) {
instrument_mem_access(
env, sbOut,
data->Iex.Load.addr,
sizeofIRType(data->Iex.Load.ty),
False/*!isStore*/,
sizeofIRType(hWordTy),
env->curr_IP, layout
);
env->firstRef = False;
}
}
break;
}
case Ist_Dirty: {
Int dataSize;
IRDirty* d = st->Ist.Dirty.details;
if (d->mFx != Ifx_None) {
/* This dirty helper accesses memory. Collect the
details. */
tl_assert(env->curr_IP_known);
if (env->firstRef) {
tl_assert(d->mAddr != NULL);
tl_assert(d->mSize != 0);
dataSize = d->mSize;
if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
instrument_mem_access(
env, sbOut, d->mAddr, dataSize, False/*!isStore*/,
sizeofIRType(hWordTy), env->curr_IP, layout
);
}
if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
instrument_mem_access(
env, sbOut, d->mAddr, dataSize, True/*isStore*/,
sizeofIRType(hWordTy), env->curr_IP, layout
);
}
env->firstRef = False;
}
} else {
tl_assert(d->mAddr == NULL);
tl_assert(d->mSize == 0);
}
break;
}
case Ist_CAS: {
/* We treat it as a read and a write of the location. I
think that is the same behaviour as it was before IRCAS
was introduced, since prior to that point, the Vex front
ends would translate a lock-prefixed instruction into a
(normal) read followed by a (normal) write. */
if (env->firstRef) {
Int dataSize;
IRCAS* cas = st->Ist.CAS.details;
tl_assert(cas->addr != NULL);
tl_assert(cas->dataLo != NULL);
dataSize = sizeofIRType(typeOfIRExpr(sbOut->tyenv, cas->dataLo));
if (cas->dataHi != NULL)
dataSize *= 2; /* since it's a doubleword-CAS */
instrument_mem_access(
env, sbOut, cas->addr, dataSize, False/*!isStore*/,
sizeofIRType(hWordTy), env->curr_IP, layout
);
instrument_mem_access(
env, sbOut, cas->addr, dataSize, True/*isStore*/,
sizeofIRType(hWordTy), env->curr_IP, layout
);
env->firstRef = False;
}
break;
}
default:
tl_assert(0);
} /* switch (st->tag) */
}
/* Add instrumentation for the final jump of an IRSB 'sbOut', and
possibly modify 'env' as required. This must be the last
instrumentation statement in the block. */
void sg_instrument_final_jump ( /*MOD*/struct _SGEnv * env,
/*MOD*/IRSB* sbOut,
IRExpr* next,
IRJumpKind jumpkind,
const VexGuestLayout* layout,
IRType gWordTy, IRType hWordTy )
{
if (!sg_clo_enable_sg_checks)
return;
if (jumpkind == Ijk_Call) {
// Assumes x86 or amd64
IRTemp sp_post_call_insn, fp_post_call_insn;
XArray* frameBlocks;
IRExpr** args;
IRDirty* di;
sp_post_call_insn
= gen_Get_SP( env, sbOut, layout, sizeofIRType(hWordTy) );
fp_post_call_insn
= gen_Get_FP( env, sbOut, layout, sizeofIRType(hWordTy) );
tl_assert(env->curr_IP_known);
frameBlocks = get_StackBlocks_for_IP( env->curr_IP );
tl_assert(frameBlocks);
if (VG_(sizeXA)(frameBlocks) == 0)
frameBlocks = NULL;
args
= mkIRExprVec_5(
IRExpr_RdTmp(sp_post_call_insn),
IRExpr_RdTmp(fp_post_call_insn),
/* assume the call doesn't change FP */
next,
mkIRExpr_HWord( (HWord)frameBlocks ),
mkIRExpr_HWord( sizeofIRType(gWordTy) )
);
di = unsafeIRDirty_0_N(
3/*regparms*/,
"helperc__new_frame",
VG_(fnptr_to_fnentry)( &helperc__new_frame ),
args );
addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
}
}
//////////////////////////////////////////////////////////////
// //
// end Instrumentation //
// //
//////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////
// //
// misc //
// //
//////////////////////////////////////////////////////////////
/* Make a new empty stack frame that is suitable for being the
outermost frame in a stack. It has a creation_sp of effectively
infinity, so it can never be removed. */
static StackFrame* new_root_StackFrame ( void )
{
StackFrame* sframe = sg_malloc("di.sg_main.nrS.1", sizeof(StackFrame));
VG_(memset)( sframe, 0, sizeof(*sframe) );
sframe->creation_sp = ~0UL;
/* This sets up .htab, .htab_size and .htab_used */
initialise_II_hash_table( sframe );
/* ->depth, ->outer, ->inner are 0, NULL, NULL */
return sframe;
}
/* Primary routine for setting up the shadow stack for a new thread.
Note that this is used to create not only child thread stacks, but
the root thread's stack too. We create a new stack with
.creation_sp set to infinity, so that the outermost frame can never
be removed (by shadowStack_unwind). The core calls this function
as soon as a thread is created. We cannot yet get its SP value,
since that may not yet be set. */
static void shadowStack_thread_create ( ThreadId parent, ThreadId child )
{
tl_assert(is_sane_TId(child));
if (parent == VG_INVALID_THREADID) {
/* creating the main thread's stack */
} else {
tl_assert(is_sane_TId(parent));
tl_assert(parent != child);
tl_assert(shadowStacks[parent] != NULL);
tl_assert(siTrees[parent] != NULL);
}
/* Create the child's stack. Bear in mind we may be re-using
it. */
if (shadowStacks[child] == NULL) {
/* First use of this stack. Just allocate an initial frame. */
tl_assert(siTrees[child] == NULL);
} else {
StackFrame *frame, *frame2;
/* re-using a stack. */
/* get rid of the interval tree */
tl_assert(siTrees[child] != NULL);
delete_StackTree( siTrees[child] );
siTrees[child] = NULL;
/* Throw away all existing frames. */
frame = shadowStacks[child];
while (frame->outer)
frame = frame->outer;
tl_assert(frame->depth == 0);
while (frame) {
frame2 = frame->inner;
if (frame2) tl_assert(1 + frame->depth == frame2->depth);
sg_free(frame);
frame = frame2;
}
shadowStacks[child] = NULL;
}
tl_assert(shadowStacks[child] == NULL);
tl_assert(siTrees[child] == NULL);
/* Set up the initial stack frame. */
shadowStacks[child] = new_root_StackFrame();
/* and set up the child's stack block interval tree. */
siTrees[child] = new_StackTree();
}
/* Once a thread is ready to go, the core calls here. We take the
opportunity to push a second frame on its stack, with the
presumably valid SP value that is going to be used for the thread's
startup. Hence we should always wind up with a valid outermost
frame for the thread. */
static void shadowStack_set_initial_SP ( ThreadId tid )
{
StackFrame* sf;
tl_assert(is_sane_TId(tid));
sf = shadowStacks[tid];
tl_assert(sf != NULL);
tl_assert(sf->outer == NULL);
tl_assert(sf->inner == NULL);
tl_assert(sf->creation_sp == ~0UL);
shadowStack_new_frame( tid, 0, VG_(get_SP)(tid),
0, VG_(get_IP)(tid), NULL );
}
//////////////////////////////////////////////////////////////
// //
// main-ish //
// //
//////////////////////////////////////////////////////////////
/* CALLED indirectly FROM GENERATED CODE. Calls here are created by
sp-change analysis, as requested in pc_pre_clo_int(). */
void sg_die_mem_stack ( Addr old_SP, SizeT len ) {
ThreadId tid = VG_(get_running_tid)();
shadowStack_unwind( tid, old_SP+len );
}
void sg_pre_clo_init ( void ) {
ourGlobals_init();
init_StackBlocks_set();
init_GlobalBlock_set();
}
void sg_post_clo_init ( void ) {
}
void sg_pre_thread_ll_create ( ThreadId parent, ThreadId child ) {
shadowStack_thread_create(parent, child);
}
void sg_pre_thread_first_insn ( ThreadId tid ) {
shadowStack_set_initial_SP(tid);
}
void sg_fini(Int exitcode)
{
if (VG_(clo_stats)) {
VG_(message)(Vg_DebugMsg,
" sg_: %'llu total accesses, of which:\n", stats__total_accesses);
VG_(message)(Vg_DebugMsg,
" sg_: stack0: %'12llu classify\n",
stats__classify_Stack0);
VG_(message)(Vg_DebugMsg,
" sg_: stackN: %'12llu classify\n",
stats__classify_StackN);
VG_(message)(Vg_DebugMsg,
" sg_: global: %'12llu classify\n",
stats__classify_Global);
VG_(message)(Vg_DebugMsg,
" sg_: unknown: %'12llu classify\n",
stats__classify_Unknown);
VG_(message)(Vg_DebugMsg,
" sg_: %'llu Invars preened, of which %'llu changed\n",
stats__Invars_preened, stats__Invars_changed);
VG_(message)(Vg_DebugMsg,
" sg_: t_i_b_MT: %'12llu\n", stats__t_i_b_empty);
VG_(message)(Vg_DebugMsg,
" sg_: qcache: %'llu searches, %'llu probes, %'llu misses\n",
stats__qcache_queries, stats__qcache_probes, stats__qcache_misses);
VG_(message)(Vg_DebugMsg,
" sg_: htab-fast: %'llu hits\n",
stats__htab_fast);
VG_(message)(Vg_DebugMsg,
" sg_: htab-slow: %'llu searches, %'llu probes, %'llu resizes\n",
stats__htab_searches, stats__htab_probes, stats__htab_resizes);
}
}
/*--------------------------------------------------------------------*/
/*--- end sg_main.c ---*/
/*--------------------------------------------------------------------*/