/*
* Copyright (C) 2010 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Verifier basic block functions.
*/
#include "Dalvik.h"
#include "analysis/VfyBasicBlock.h"
#include "analysis/CodeVerify.h"
#include "analysis/VerifySubs.h"
#include "libdex/DexCatch.h"
#include "libdex/InstrUtils.h"
/*
* Extract the list of catch handlers from "pTry" into "addrBuf".
*
* Returns the size of the catch handler list. If the return value
* exceeds "addrBufSize", the items at the end of the list will not be
* represented in the output array, and this function should be called
* again with a larger buffer.
*/
static u4 extractCatchHandlers(const DexCode* pCode, const DexTry* pTry,
u4* addrBuf, size_t addrBufSize)
{
DexCatchIterator iterator;
unsigned int idx = 0;
dexCatchIteratorInit(&iterator, pCode, pTry->handlerOff);
while (true) {
DexCatchHandler* handler = dexCatchIteratorNext(&iterator);
if (handler == NULL)
break;
if (idx < addrBufSize) {
addrBuf[idx] = handler->address;
}
idx++;
}
return idx;
}
/*
* Returns "true" if the instruction represents a data chunk, such as a
* switch statement block.
*/
static bool isDataChunk(u2 insn)
{
return (insn == kPackedSwitchSignature ||
insn == kSparseSwitchSignature ||
insn == kArrayDataSignature);
}
/*
* Alloc a basic block in the specified slot. The storage will be
* initialized.
*/
static VfyBasicBlock* allocVfyBasicBlock(VerifierData* vdata, u4 idx)
{
VfyBasicBlock* newBlock = (VfyBasicBlock*) calloc(1, sizeof(VfyBasicBlock));
if (newBlock == NULL)
return NULL;
/*
* TODO: there is no good default size here -- the problem is that most
* addresses will only have one predecessor, but a fair number will
* have 10+, and a few will have 100+ (e.g. the synthetic "finally"
* in a large synchronized method). We probably want to use a small
* base allocation (perhaps two) and then have the first overflow
* allocation jump dramatically (to 32 or thereabouts).
*/
newBlock->predecessors = dvmPointerSetAlloc(32);
if (newBlock->predecessors == NULL) {
free(newBlock);
return NULL;
}
newBlock->firstAddr = (u4) -1; // DEBUG
newBlock->liveRegs = dvmAllocBitVector(vdata->insnRegCount, false);
if (newBlock->liveRegs == NULL) {
dvmPointerSetFree(newBlock->predecessors);
free(newBlock);
return NULL;
}
return newBlock;
}
/*
* Add "curBlock" to the predecessor list in "targetIdx".
*/
static bool addToPredecessor(VerifierData* vdata, VfyBasicBlock* curBlock,
u4 targetIdx)
{
assert(targetIdx < vdata->insnsSize);
/*
* Allocate the target basic block if necessary. This will happen
* on e.g. forward branches.
*
* We can't fill in all the fields, but that will happen automatically
* when we get to that part of the code.
*/
VfyBasicBlock* targetBlock = vdata->basicBlocks[targetIdx];
if (targetBlock == NULL) {
targetBlock = allocVfyBasicBlock(vdata, targetIdx);
if (targetBlock == NULL)
return false;
vdata->basicBlocks[targetIdx] = targetBlock;
}
PointerSet* preds = targetBlock->predecessors;
bool added = dvmPointerSetAddEntry(preds, curBlock);
if (!added) {
/*
* This happens sometimes for packed-switch instructions, where
* the same target address appears more than once. Also, a
* (pointless) conditional branch to the next instruction will
* trip over this.
*/
LOGV("ODD: point set for targ=0x%04x (%p) already had block "
"fir=0x%04x (%p)",
targetIdx, targetBlock, curBlock->firstAddr, curBlock);
}
return true;
}
/*
* Add ourselves to the predecessor list in all blocks we might transfer
* control to.
*
* There are four ways to proceed to a new instruction:
* (1) continue to the following instruction
* (2) [un]conditionally branch to a specific location
* (3) conditionally branch through a "switch" statement
* (4) throw an exception
*
* Returning from the method (via a return statement or an uncaught
* exception) are not interesting for liveness analysis.
*/
static bool setPredecessors(VerifierData* vdata, VfyBasicBlock* curBlock,
u4 curIdx, OpcodeFlags opFlags, u4 nextIdx, u4* handlerList,
size_t numHandlers)
{
const InsnFlags* insnFlags = vdata->insnFlags;
const Method* meth = vdata->method;
unsigned int handlerIdx;
for (handlerIdx = 0; handlerIdx < numHandlers; handlerIdx++) {
if (!addToPredecessor(vdata, curBlock, handlerList[handlerIdx]))
return false;
}
if ((opFlags & kInstrCanContinue) != 0) {
if (!addToPredecessor(vdata, curBlock, nextIdx))
return false;
}
if ((opFlags & kInstrCanBranch) != 0) {
bool unused, gotBranch;
s4 branchOffset, absOffset;
gotBranch = dvmGetBranchOffset(meth, insnFlags, curIdx,
&branchOffset, &unused);
assert(gotBranch);
absOffset = curIdx + branchOffset;
assert(absOffset >= 0 && (u4) absOffset < vdata->insnsSize);
if (!addToPredecessor(vdata, curBlock, absOffset))
return false;
}
if ((opFlags & kInstrCanSwitch) != 0) {
const u2* curInsn = &meth->insns[curIdx];
const u2* dataPtr;
/* these values have already been verified, so we can trust them */
s4 offsetToData = curInsn[1] | ((s4) curInsn[2]) << 16;
dataPtr = curInsn + offsetToData;
/*
* dataPtr points to the start of the switch data. The first
* item is the NOP+magic, the second is the number of entries in
* the switch table.
*/
u2 switchCount = dataPtr[1];
/*
* Skip past the ident field, size field, and the first_key field
* (for packed) or the key list (for sparse).
*/
if (dexOpcodeFromCodeUnit(meth->insns[curIdx]) == OP_PACKED_SWITCH) {
dataPtr += 4;
} else {
assert(dexOpcodeFromCodeUnit(meth->insns[curIdx]) ==
OP_SPARSE_SWITCH);
dataPtr += 2 + 2 * switchCount;
}
u4 switchIdx;
for (switchIdx = 0; switchIdx < switchCount; switchIdx++) {
s4 offset, absOffset;
offset = (s4) dataPtr[switchIdx*2] |
(s4) (dataPtr[switchIdx*2 +1] << 16);
absOffset = curIdx + offset;
assert(absOffset >= 0 && (u4) absOffset < vdata->insnsSize);
if (!addToPredecessor(vdata, curBlock, absOffset))
return false;
}
}
if (false) {
if (dvmPointerSetGetCount(curBlock->predecessors) > 256) {
LOGI("Lots of preds at 0x%04x in %s.%s:%s", curIdx,
meth->clazz->descriptor, meth->name, meth->shorty);
}
}
return true;
}
/*
* Dump the contents of the basic blocks.
*/
static void dumpBasicBlocks(const VerifierData* vdata)
{
char printBuf[256];
unsigned int idx;
int count;
LOGI("Basic blocks for %s.%s:%s", vdata->method->clazz->descriptor,
vdata->method->name, vdata->method->shorty);
for (idx = 0; idx < vdata->insnsSize; idx++) {
VfyBasicBlock* block = vdata->basicBlocks[idx];
if (block == NULL)
continue;
assert(block->firstAddr == idx);
count = snprintf(printBuf, sizeof(printBuf), " %04x-%04x ",
block->firstAddr, block->lastAddr);
PointerSet* preds = block->predecessors;
size_t numPreds = dvmPointerSetGetCount(preds);
if (numPreds > 0) {
count += snprintf(printBuf + count, sizeof(printBuf) - count,
"preds:");
unsigned int predIdx;
for (predIdx = 0; predIdx < numPreds; predIdx++) {
if (count >= (int) sizeof(printBuf))
break;
const VfyBasicBlock* pred =
(const VfyBasicBlock*) dvmPointerSetGetEntry(preds, predIdx);
count += snprintf(printBuf + count, sizeof(printBuf) - count,
"%04x(%p),", pred->firstAddr, pred);
}
} else {
count += snprintf(printBuf + count, sizeof(printBuf) - count,
"(no preds)");
}
printBuf[sizeof(printBuf)-2] = '!';
printBuf[sizeof(printBuf)-1] = '\0';
LOGI("%s", printBuf);
}
usleep(100 * 1000); /* ugh...let logcat catch up */
}
/*
* Generate a list of basic blocks and related information.
*
* On success, returns "true" with vdata->basicBlocks initialized.
*/
bool dvmComputeVfyBasicBlocks(VerifierData* vdata)
{
const InsnFlags* insnFlags = vdata->insnFlags;
const Method* meth = vdata->method;
const u4 insnsSize = vdata->insnsSize;
const DexCode* pCode = dvmGetMethodCode(meth);
const DexTry* pTries = NULL;
const size_t kHandlerStackAllocSize = 16; /* max seen so far is 7 */
u4 handlerAddrs[kHandlerStackAllocSize];
u4* handlerListAlloc = NULL;
u4* handlerList = NULL;
size_t numHandlers = 0;
u4 idx, blockStartAddr;
bool result = false;
bool verbose = false; //dvmWantVerboseVerification(meth);
if (verbose) {
LOGI("Basic blocks for %s.%s:%s",
meth->clazz->descriptor, meth->name, meth->shorty);
}
/*
* Allocate a data structure that allows us to map from an address to
* the corresponding basic block. Initially all pointers are NULL.
* They are populated on demand as we proceed (either when we reach a
* new BB, or when we need to add an item to the predecessor list in
* a not-yet-reached BB).
*
* Only the first instruction in the block points to the BB structure;
* the rest remain NULL.
*/
vdata->basicBlocks =
(VfyBasicBlock**) calloc(insnsSize, sizeof(VfyBasicBlock*));
if (vdata->basicBlocks == NULL)
return false;
/*
* The "tries" list is a series of non-overlapping regions with a list
* of "catch" handlers. Rather than do the "find a matching try block"
* computation at each step, we just walk the "try" list in parallel.
*
* Not all methods have "try" blocks. If this one does, we init tryEnd
* to zero, so that the (exclusive bound) range check trips immediately.
*/
u4 tryIndex = 0, tryStart = 0, tryEnd = 0;
if (pCode->triesSize != 0) {
pTries = dexGetTries(pCode);
}
u4 debugBBIndex = 0;
/*
* The address associated with a basic block is the start address.
*/
blockStartAddr = 0;
for (idx = 0; idx < insnsSize; ) {
/*
* Make sure we're pointing at the right "try" block. It should
* not be possible to "jump over" a block, so if we're no longer
* in the correct one we can just advance to the next.
*/
if (pTries != NULL && idx >= tryEnd) {
if (tryIndex == pCode->triesSize) {
/* no more try blocks in this method */
pTries = NULL;
numHandlers = 0;
} else {
/*
* Extract the set of handlers. We want to avoid doing
* this for each block, so we copy them to local storage.
* If it doesn't fit in the small stack area, we'll use
* the heap instead.
*
* It's rare to encounter a method with more than half a
* dozen possible handlers.
*/
tryStart = pTries[tryIndex].startAddr;
tryEnd = tryStart + pTries[tryIndex].insnCount;
if (handlerListAlloc != NULL) {
free(handlerListAlloc);
handlerListAlloc = NULL;
}
numHandlers = extractCatchHandlers(pCode, &pTries[tryIndex],
handlerAddrs, kHandlerStackAllocSize);
assert(numHandlers > 0); // TODO make sure this is verified
if (numHandlers <= kHandlerStackAllocSize) {
handlerList = handlerAddrs;
} else {
LOGD("overflow, numHandlers=%d", numHandlers);
handlerListAlloc = (u4*) malloc(sizeof(u4) * numHandlers);
if (handlerListAlloc == NULL)
return false;
extractCatchHandlers(pCode, &pTries[tryIndex],
handlerListAlloc, numHandlers);
handlerList = handlerListAlloc;
}
LOGV("+++ start=%x end=%x numHan=%d",
tryStart, tryEnd, numHandlers);
tryIndex++;
}
}
/*
* Check the current instruction, and possibly aspects of the
* next instruction, to see if this instruction ends the current
* basic block.
*
* Instructions that can throw only end the block if there is the
* possibility of a local handler catching the exception.
*/
Opcode opcode = dexOpcodeFromCodeUnit(meth->insns[idx]);
OpcodeFlags opFlags = dexGetFlagsFromOpcode(opcode);
size_t nextIdx = idx + dexGetWidthFromInstruction(&meth->insns[idx]);
bool endBB = false;
bool ignoreInstr = false;
if ((opFlags & kInstrCanContinue) == 0) {
/* does not continue */
endBB = true;
} else if ((opFlags & (kInstrCanBranch | kInstrCanSwitch)) != 0) {
/* conditionally branches elsewhere */
endBB = true;
} else if ((opFlags & kInstrCanThrow) != 0 &&
dvmInsnIsInTry(insnFlags, idx))
{
/* throws an exception that might be caught locally */
endBB = true;
} else if (isDataChunk(meth->insns[idx])) {
/*
* If this is a data chunk (e.g. switch data) we want to skip
* over it entirely. Set endBB so we don't carry this along as
* the start of a block, and ignoreInstr so we don't try to
* open a basic block for this instruction.
*/
endBB = ignoreInstr = true;
} else if (dvmInsnIsBranchTarget(insnFlags, nextIdx)) {
/*
* We also need to end it if the next instruction is a branch
* target. Note we've tagged exception catch blocks as such.
*
* If we're this far along in the "else" chain, we know that
* this isn't a data-chunk NOP, and control can continue to
* the next instruction, so we're okay examining "nextIdx".
*/
assert(nextIdx < insnsSize);
endBB = true;
} else if (opcode == OP_NOP && isDataChunk(meth->insns[nextIdx])) {
/*
* Handle an odd special case: if this is NOP padding before a
* data chunk, also treat it as "ignore". Otherwise it'll look
* like a block that starts and doesn't end.
*/
endBB = ignoreInstr = true;
} else {
/* check: return ops should be caught by absence of can-continue */
assert((opFlags & kInstrCanReturn) == 0);
}
if (verbose) {
char btc = dvmInsnIsBranchTarget(insnFlags, idx) ? '>' : ' ';
char tryc =
(pTries != NULL && idx >= tryStart && idx < tryEnd) ? 't' : ' ';
bool startBB = (idx == blockStartAddr);
const char* startEnd;
if (ignoreInstr)
startEnd = "IGNORE";
else if (startBB && endBB)
startEnd = "START/END";
else if (startBB)
startEnd = "START";
else if (endBB)
startEnd = "END";
else
startEnd = "-";
LOGI("%04x: %c%c%s #%d", idx, tryc, btc, startEnd, debugBBIndex);
if (pTries != NULL && idx == tryStart) {
assert(numHandlers > 0);
LOGI(" EXC block: [%04x, %04x) %d:(%04x...)",
tryStart, tryEnd, numHandlers, handlerList[0]);
}
}
if (idx != blockStartAddr) {
/* should not be a basic block struct associated with this addr */
assert(vdata->basicBlocks[idx] == NULL);
}
if (endBB) {
if (!ignoreInstr) {
/*
* Create a new BB if one doesn't already exist.
*/
VfyBasicBlock* curBlock = vdata->basicBlocks[blockStartAddr];
if (curBlock == NULL) {
curBlock = allocVfyBasicBlock(vdata, blockStartAddr);
if (curBlock == NULL)
return false;
vdata->basicBlocks[blockStartAddr] = curBlock;
}
curBlock->firstAddr = blockStartAddr;
curBlock->lastAddr = idx;
if (!setPredecessors(vdata, curBlock, idx, opFlags, nextIdx,
handlerList, numHandlers))
{
goto bail;
}
}
blockStartAddr = nextIdx;
debugBBIndex++;
}
idx = nextIdx;
}
assert(idx == insnsSize);
result = true;
if (verbose)
dumpBasicBlocks(vdata);
bail:
free(handlerListAlloc);
return result;
}
/*
* Free the storage used by basic blocks.
*/
void dvmFreeVfyBasicBlocks(VerifierData* vdata)
{
unsigned int idx;
if (vdata->basicBlocks == NULL)
return;
for (idx = 0; idx < vdata->insnsSize; idx++) {
VfyBasicBlock* block = vdata->basicBlocks[idx];
if (block == NULL)
continue;
dvmPointerSetFree(block->predecessors);
free(block);
}
}