/* * Copyright (C) 2008 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * Dalvik classfile verification. This file contains the verifier entry * points and the static constraint checks. */ #include "Dalvik.h" #include "analysis/CodeVerify.h" /* fwd */ static bool verifyMethod(Method* meth); static bool verifyInstructions(VerifierData* vdata); /* * Initialize some things we need for verification. */ bool dvmVerificationStartup(void) { gDvm.instrWidth = dexCreateInstrWidthTable(); gDvm.instrFormat = dexCreateInstrFormatTable(); gDvm.instrFlags = dexCreateInstrFlagsTable(); if (gDvm.instrWidth == NULL || gDvm.instrFormat == NULL || gDvm.instrFlags == NULL) { LOGE("Unable to create instruction tables\n"); return false; } return true; } /* * Free up some things we needed for verification. */ void dvmVerificationShutdown(void) { free(gDvm.instrWidth); free(gDvm.instrFormat); free(gDvm.instrFlags); } /* * Verify a class. * * By the time we get here, the value of gDvm.classVerifyMode should already * have been factored in. If you want to call into the verifier even * though verification is disabled, that's your business. * * Returns "true" on success. */ bool dvmVerifyClass(ClassObject* clazz) { int i; if (dvmIsClassVerified(clazz)) { LOGD("Ignoring duplicate verify attempt on %s\n", clazz->descriptor); return true; } for (i = 0; i < clazz->directMethodCount; i++) { if (!verifyMethod(&clazz->directMethods[i])) { LOG_VFY("Verifier rejected class %s\n", clazz->descriptor); return false; } } for (i = 0; i < clazz->virtualMethodCount; i++) { if (!verifyMethod(&clazz->virtualMethods[i])) { LOG_VFY("Verifier rejected class %s\n", clazz->descriptor); return false; } } return true; } /* * Perform verification on a single method. * * We do this in three passes: * (1) Walk through all code units, determining instruction locations, * widths, and other characteristics. * (2) Walk through all code units, performing static checks on * operands. * (3) Iterate through the method, checking type safety and looking * for code flow problems. * * Some checks may be bypassed depending on the verification mode. We can't * turn this stuff off completely if we want to do "exact" GC. * * TODO: cite source? * Confirmed here: * - code array must not be empty * - (N/A) code_length must be less than 65536 * Confirmed by dvmComputeCodeWidths(): * - opcode of first instruction begins at index 0 * - only documented instructions may appear * - each instruction follows the last * - last byte of last instruction is at (code_length-1) */ static bool verifyMethod(Method* meth) { bool result = false; int newInstanceCount; /* * Verifier state blob. Various values will be cached here so we * can avoid expensive lookups and pass fewer arguments around. */ VerifierData vdata; #if 1 // ndef NDEBUG memset(&vdata, 0x99, sizeof(vdata)); #endif vdata.method = meth; vdata.insnsSize = dvmGetMethodInsnsSize(meth); vdata.insnRegCount = meth->registersSize; vdata.insnFlags = NULL; vdata.uninitMap = NULL; /* * If there aren't any instructions, make sure that's expected, then * exit successfully. Note: for native methods, meth->insns gets set * to a native function pointer on first call, so don't use that as * an indicator. */ if (vdata.insnsSize == 0) { if (!dvmIsNativeMethod(meth) && !dvmIsAbstractMethod(meth)) { LOG_VFY_METH(meth, "VFY: zero-length code in concrete non-native method\n"); goto bail; } goto success; } /* * Sanity-check the register counts. ins + locals = registers, so make * sure that ins <= registers. */ if (meth->insSize > meth->registersSize) { LOG_VFY_METH(meth, "VFY: bad register counts (ins=%d regs=%d)\n", meth->insSize, meth->registersSize); goto bail; } /* * Allocate and populate an array to hold instruction data. * * TODO: Consider keeping a reusable pre-allocated array sitting * around for smaller methods. */ vdata.insnFlags = (InsnFlags*) calloc(dvmGetMethodInsnsSize(meth), sizeof(InsnFlags)); if (vdata.insnFlags == NULL) goto bail; /* * Compute the width of each instruction and store the result in insnFlags. * Count up the #of occurrences of new-instance instructions while we're * at it. */ if (!dvmComputeCodeWidths(meth, vdata.insnFlags, &newInstanceCount)) goto bail; /* * Allocate a map to hold the classes of uninitialized instances. */ vdata.uninitMap = dvmCreateUninitInstanceMap(meth, vdata.insnFlags, newInstanceCount); if (vdata.uninitMap == NULL) goto bail; /* * Set the "in try" flags for all instructions guarded by a "try" block. */ if (!dvmSetTryFlags(meth, vdata.insnFlags)) goto bail; /* * Perform static instruction verification. */ if (!verifyInstructions(&vdata)) goto bail; /* * Do code-flow analysis. Do this after verifying the branch targets * so we don't need to worry about it here. * * If there are no registers, we don't need to do much in the way of * analysis, but we still need to verify that nothing actually tries * to use a register. */ if (!dvmVerifyCodeFlow(&vdata)) { //LOGD("+++ %s failed code flow\n", meth->name); goto bail; } success: result = true; bail: dvmFreeUninitInstanceMap(vdata.uninitMap); free(vdata.insnFlags); return result; } /* * Verify an array data table. "curOffset" is the offset of the fill-array-data * instruction. */ static bool checkArrayData(const Method* meth, int curOffset) { const int insnCount = dvmGetMethodInsnsSize(meth); const u2* insns = meth->insns + curOffset; const u2* arrayData; int valueCount, valueWidth, tableSize; int offsetToArrayData; assert(curOffset >= 0 && curOffset < insnCount); /* make sure the start of the array data table is in range */ offsetToArrayData = insns[1] | (((s4)insns[2]) << 16); if (curOffset + offsetToArrayData < 0 || curOffset + offsetToArrayData + 2 >= insnCount) { LOG_VFY_METH(meth, "VFY: invalid array data start: at %d, data offset %d, count %d\n", curOffset, offsetToArrayData, insnCount); return false; } /* offset to array data table is a relative branch-style offset */ arrayData = insns + offsetToArrayData; /* make sure the table is 32-bit aligned */ if ((((u4) arrayData) & 0x03) != 0) { LOG_VFY_METH(meth, "VFY: unaligned array data table: at %d, data offset %d\n", curOffset, offsetToArrayData); return false; } valueWidth = arrayData[1]; valueCount = *(u4*)(&arrayData[2]); tableSize = 4 + (valueWidth * valueCount + 1) / 2; /* make sure the end of the switch is in range */ if (curOffset + offsetToArrayData + tableSize > insnCount) { LOG_VFY_METH(meth, "VFY: invalid array data end: at %d, data offset %d, end %d, " "count %d\n", curOffset, offsetToArrayData, curOffset + offsetToArrayData + tableSize, insnCount); return false; } return true; } /* * Decode the current instruction. */ static void decodeInstruction(const Method* meth, int insnIdx, DecodedInstruction* pDecInsn) { dexDecodeInstruction(gDvm.instrFormat, meth->insns + insnIdx, pDecInsn); } /* * Perform static checks on a "new-instance" instruction. Specifically, * make sure the class reference isn't for an array class. * * We don't need the actual class, just a pointer to the class name. */ static bool checkNewInstance(const Method* meth, int insnIdx) { DvmDex* pDvmDex = meth->clazz->pDvmDex; DecodedInstruction decInsn; const char* classDescriptor; u4 idx; decodeInstruction(meth, insnIdx, &decInsn); idx = decInsn.vB; // 2nd item if (idx >= pDvmDex->pHeader->typeIdsSize) { LOG_VFY_METH(meth, "VFY: bad type index %d (max %d)\n", idx, pDvmDex->pHeader->typeIdsSize); return false; } classDescriptor = dexStringByTypeIdx(pDvmDex->pDexFile, idx); if (classDescriptor[0] != 'L') { LOG_VFY_METH(meth, "VFY: can't call new-instance on type '%s'\n", classDescriptor); return false; } return true; } /* * Perform static checks on a "new-array" instruction. Specifically, make * sure they aren't creating an array of arrays that causes the number of * dimensions to exceed 255. */ static bool checkNewArray(const Method* meth, int insnIdx) { DvmDex* pDvmDex = meth->clazz->pDvmDex; DecodedInstruction decInsn; const char* classDescriptor; u4 idx; decodeInstruction(meth, insnIdx, &decInsn); idx = decInsn.vC; // 3rd item if (idx >= pDvmDex->pHeader->typeIdsSize) { LOG_VFY_METH(meth, "VFY: bad type index %d (max %d)\n", idx, pDvmDex->pHeader->typeIdsSize); return false; } classDescriptor = dexStringByTypeIdx(pDvmDex->pDexFile, idx); int bracketCount = 0; const char* cp = classDescriptor; while (*cp++ == '[') bracketCount++; if (bracketCount == 0) { /* The given class must be an array type. */ LOG_VFY_METH(meth, "VFY: can't new-array class '%s' (not an array)\n", classDescriptor); return false; } else if (bracketCount > 255) { /* It is illegal to create an array of more than 255 dimensions. */ LOG_VFY_METH(meth, "VFY: can't new-array class '%s' (exceeds limit)\n", classDescriptor); return false; } return true; } /* * Perform static checks on an instruction that takes a class constant. * Ensure that the class index is in the valid range. */ static bool checkTypeIndex(const Method* meth, int insnIdx, bool useB) { DvmDex* pDvmDex = meth->clazz->pDvmDex; DecodedInstruction decInsn; u4 idx; decodeInstruction(meth, insnIdx, &decInsn); if (useB) idx = decInsn.vB; else idx = decInsn.vC; if (idx >= pDvmDex->pHeader->typeIdsSize) { LOG_VFY_METH(meth, "VFY: bad type index %d (max %d)\n", idx, pDvmDex->pHeader->typeIdsSize); return false; } return true; } /* * Perform static checks on a field get or set instruction. All we do * here is ensure that the field index is in the valid range. */ static bool checkFieldIndex(const Method* meth, int insnIdx, bool useB) { DvmDex* pDvmDex = meth->clazz->pDvmDex; DecodedInstruction decInsn; u4 idx; decodeInstruction(meth, insnIdx, &decInsn); if (useB) idx = decInsn.vB; else idx = decInsn.vC; if (idx >= pDvmDex->pHeader->fieldIdsSize) { LOG_VFY_METH(meth, "VFY: bad field index %d (max %d) at offset 0x%04x\n", idx, pDvmDex->pHeader->fieldIdsSize, insnIdx); return false; } return true; } /* * Perform static checks on a method invocation instruction. All we do * here is ensure that the method index is in the valid range. */ static bool checkMethodIndex(const Method* meth, int insnIdx) { DvmDex* pDvmDex = meth->clazz->pDvmDex; DecodedInstruction decInsn; decodeInstruction(meth, insnIdx, &decInsn); if (decInsn.vB >= pDvmDex->pHeader->methodIdsSize) { LOG_VFY_METH(meth, "VFY: bad method index %d (max %d)\n", decInsn.vB, pDvmDex->pHeader->methodIdsSize); return false; } return true; } /* * Perform static checks on a string constant instruction. All we do * here is ensure that the string index is in the valid range. */ static bool checkStringIndex(const Method* meth, int insnIdx) { DvmDex* pDvmDex = meth->clazz->pDvmDex; DecodedInstruction decInsn; decodeInstruction(meth, insnIdx, &decInsn); if (decInsn.vB >= pDvmDex->pHeader->stringIdsSize) { LOG_VFY_METH(meth, "VFY: bad string index %d (max %d)\n", decInsn.vB, pDvmDex->pHeader->stringIdsSize); return false; } return true; } /* * Perform static verification on instructions. * * As a side effect, this sets the "branch target" flags in InsnFlags. * * "(CF)" items are handled during code-flow analysis. * * v3 4.10.1 * - target of each jump and branch instruction must be valid * - targets of switch statements must be valid * - (CF) operands referencing constant pool entries must be valid * - (CF) operands of getfield, putfield, getstatic, putstatic must be valid * - (new) verify operands of "quick" field ops * - (CF) operands of method invocation instructions must be valid * - (new) verify operands of "quick" method invoke ops * - (CF) only invoke-direct can call a method starting with '<' * - (CF) <clinit> must never be called explicitly * - (CF) operands of instanceof, checkcast, new (and variants) must be valid * - new-array[-type] limited to 255 dimensions * - can't use "new" on an array class * - (?) limit dimensions in multi-array creation * - (CF) local variable load/store register values must be in valid range * * v3 4.11.1.2 * - branches must be within the bounds of the code array * - targets of all control-flow instructions are the start of an instruction * - (CF) register accesses fall within range of allocated registers * - (N/A) access to constant pool must be of appropriate type * - (CF) code does not end in the middle of an instruction * - (CF) execution cannot fall off the end of the code * - (earlier) for each exception handler, the "try" area must begin and * end at the start of an instruction (end can be at the end of the code) * - (earlier) for each exception handler, the handler must start at a valid * instruction * * TODO: move some of the "CF" items in here for better performance (the * code-flow analysis sometimes has to process the same instruction several * times). */ static bool verifyInstructions(VerifierData* vdata) { const Method* meth = vdata->method; InsnFlags* insnFlags = vdata->insnFlags; const size_t insnCount = vdata->insnsSize; const u2* insns = meth->insns; int i; /* the start of the method is a "branch target" */ dvmInsnSetBranchTarget(insnFlags, 0, true); for (i = 0; i < (int) insnCount; /**/) { /* * These types of instructions can be GC points. To support precise * GC, all such instructions must export the PC in the interpreter, * or the GC won't be able to identify the current PC for the thread. */ static const int gcMask = kInstrCanBranch | kInstrCanSwitch | kInstrCanThrow | kInstrCanReturn; int width = dvmInsnGetWidth(insnFlags, i); OpCode opcode = *insns & 0xff; InstructionFlags opFlags = dexGetInstrFlags(gDvm.instrFlags, opcode); if ((opFlags & gcMask) != 0) { /* * This instruction is probably a GC point. Branch instructions * only qualify if they go backward, so we need to check the * offset. */ int offset = -1; bool unused; if (dvmGetBranchTarget(meth, insnFlags, i, &offset, &unused)) { if (offset <= 0) { dvmInsnSetGcPoint(insnFlags, i, true); } } else { /* not a branch target */ dvmInsnSetGcPoint(insnFlags, i, true); } } switch (opcode) { case OP_NOP: /* plain no-op or switch table data; nothing to do here */ break; case OP_CONST_STRING: case OP_CONST_STRING_JUMBO: if (!checkStringIndex(meth, i)) return false; break; case OP_CONST_CLASS: case OP_CHECK_CAST: if (!checkTypeIndex(meth, i, true)) return false; break; case OP_INSTANCE_OF: if (!checkTypeIndex(meth, i, false)) return false; break; case OP_PACKED_SWITCH: case OP_SPARSE_SWITCH: /* verify the associated table */ if (!dvmCheckSwitchTargets(meth, insnFlags, i)) return false; break; case OP_FILL_ARRAY_DATA: /* verify the associated table */ if (!checkArrayData(meth, i)) return false; break; case OP_GOTO: case OP_GOTO_16: case OP_IF_EQ: case OP_IF_NE: case OP_IF_LT: case OP_IF_GE: case OP_IF_GT: case OP_IF_LE: case OP_IF_EQZ: case OP_IF_NEZ: case OP_IF_LTZ: case OP_IF_GEZ: case OP_IF_GTZ: case OP_IF_LEZ: /* check the destination */ if (!dvmCheckBranchTarget(meth, insnFlags, i, false)) return false; break; case OP_GOTO_32: /* check the destination; self-branch is okay */ if (!dvmCheckBranchTarget(meth, insnFlags, i, true)) return false; break; case OP_NEW_INSTANCE: if (!checkNewInstance(meth, i)) return false; break; case OP_NEW_ARRAY: if (!checkNewArray(meth, i)) return false; break; case OP_FILLED_NEW_ARRAY: if (!checkTypeIndex(meth, i, true)) return false; break; case OP_FILLED_NEW_ARRAY_RANGE: if (!checkTypeIndex(meth, i, true)) return false; break; case OP_IGET: case OP_IGET_WIDE: case OP_IGET_OBJECT: case OP_IGET_BOOLEAN: case OP_IGET_BYTE: case OP_IGET_CHAR: case OP_IGET_SHORT: case OP_IPUT: case OP_IPUT_WIDE: case OP_IPUT_OBJECT: case OP_IPUT_BOOLEAN: case OP_IPUT_BYTE: case OP_IPUT_CHAR: case OP_IPUT_SHORT: /* check the field index */ if (!checkFieldIndex(meth, i, false)) return false; break; case OP_SGET: case OP_SGET_WIDE: case OP_SGET_OBJECT: case OP_SGET_BOOLEAN: case OP_SGET_BYTE: case OP_SGET_CHAR: case OP_SGET_SHORT: case OP_SPUT: case OP_SPUT_WIDE: case OP_SPUT_OBJECT: case OP_SPUT_BOOLEAN: case OP_SPUT_BYTE: case OP_SPUT_CHAR: case OP_SPUT_SHORT: /* check the field index */ if (!checkFieldIndex(meth, i, true)) return false; break; case OP_INVOKE_VIRTUAL: case OP_INVOKE_SUPER: case OP_INVOKE_DIRECT: case OP_INVOKE_STATIC: case OP_INVOKE_INTERFACE: case OP_INVOKE_VIRTUAL_RANGE: case OP_INVOKE_SUPER_RANGE: case OP_INVOKE_DIRECT_RANGE: case OP_INVOKE_STATIC_RANGE: case OP_INVOKE_INTERFACE_RANGE: /* check the method index */ if (!checkMethodIndex(meth, i)) return false; break; case OP_EXECUTE_INLINE: case OP_INVOKE_DIRECT_EMPTY: case OP_IGET_QUICK: case OP_IGET_WIDE_QUICK: case OP_IGET_OBJECT_QUICK: case OP_IPUT_QUICK: case OP_IPUT_WIDE_QUICK: case OP_IPUT_OBJECT_QUICK: case OP_INVOKE_VIRTUAL_QUICK: case OP_INVOKE_VIRTUAL_QUICK_RANGE: case OP_INVOKE_SUPER_QUICK: case OP_INVOKE_SUPER_QUICK_RANGE: LOG_VFY("VFY: not expecting optimized instructions\n"); return false; break; default: /* nothing to do */ break; } assert(width > 0); i += width; insns += width; } /* make sure the last instruction ends at the end of the insn area */ if (i != (int) insnCount) { LOG_VFY_METH(meth, "VFY: code did not end when expected (end at %d, count %d)\n", i, insnCount); return false; } return true; }