pefile.py - Android社区 - https://www.androidos.net.cn/

# -*- coding: Latin-1 -*-
"""pefile, Portable Executable reader module

All the PE file basic structures are available with their default names
as attributes of the instance returned.

Processed elements such as the import table are made available with lowercase
names, to differentiate them from the upper case basic structure names.

pefile has been tested against the limits of valid PE headers, that is, malware.
Lots of packed malware attempt to abuse the format way beyond its standard use.
To the best of my knowledge most of the abuses are handled gracefully.

For detailed copyright information see the file COPYING in
the root of the distribution archive.
"""

__author__ = 'Ero Carrera'
__version__ = '1.2.9.1'
__contact__ = 'ero@dkbza.org'

import os
import struct
import time
import math
import re
import exceptions
import string
import array

sha1, sha256, sha512, md5 = None, None, None, None

try:
    import hashlib
    sha1 = hashlib.sha1
    sha256 = hashlib.sha256
    sha512 = hashlib.sha512
    md5 = hashlib.md5
except ImportError:    
    try:
        import sha
        sha1 = sha.new
    except ImportError:
        pass
    try:
        import md5
        md5 = md5.new
    except ImportError:
        pass

fast_load = False

IMAGE_DOS_SIGNATURE             = 0x5A4D
IMAGE_OS2_SIGNATURE             = 0x454E
IMAGE_OS2_SIGNATURE_LE          = 0x454C
IMAGE_VXD_SIGNATURE             = 0x454C
IMAGE_NT_SIGNATURE              = 0x00004550
IMAGE_NUMBEROF_DIRECTORY_ENTRIES= 16
IMAGE_ORDINAL_FLAG              = 0x80000000L
IMAGE_ORDINAL_FLAG64            = 0x8000000000000000L
OPTIONAL_HEADER_MAGIC_PE        = 0x10b
OPTIONAL_HEADER_MAGIC_PE_PLUS   = 0x20b

directory_entry_types = [
    ('IMAGE_DIRECTORY_ENTRY_EXPORT',        0),
    ('IMAGE_DIRECTORY_ENTRY_IMPORT',        1),
    ('IMAGE_DIRECTORY_ENTRY_RESOURCE',      2),
    ('IMAGE_DIRECTORY_ENTRY_EXCEPTION',     3),
    ('IMAGE_DIRECTORY_ENTRY_SECURITY',      4),
    ('IMAGE_DIRECTORY_ENTRY_BASERELOC',     5),
    ('IMAGE_DIRECTORY_ENTRY_DEBUG',         6),
    ('IMAGE_DIRECTORY_ENTRY_COPYRIGHT',     7),
    ('IMAGE_DIRECTORY_ENTRY_GLOBALPTR',     8),
    ('IMAGE_DIRECTORY_ENTRY_TLS',           9),
    ('IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG',   10),
    ('IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT',  11),
    ('IMAGE_DIRECTORY_ENTRY_IAT',           12),
    ('IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT',  13),
    ('IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR',14),
    ('IMAGE_DIRECTORY_ENTRY_RESERVED',      15) ]

DIRECTORY_ENTRY = dict([(e[1], e[0]) for e in directory_entry_types]+directory_entry_types)

image_characteristics = [
    ('IMAGE_FILE_RELOCS_STRIPPED',          0x0001),
    ('IMAGE_FILE_EXECUTABLE_IMAGE',         0x0002),
    ('IMAGE_FILE_LINE_NUMS_STRIPPED',       0x0004),
    ('IMAGE_FILE_LOCAL_SYMS_STRIPPED',      0x0008),
    ('IMAGE_FILE_AGGRESIVE_WS_TRIM',        0x0010),
    ('IMAGE_FILE_LARGE_ADDRESS_AWARE',      0x0020),
    ('IMAGE_FILE_16BIT_MACHINE',            0x0040),
    ('IMAGE_FILE_BYTES_REVERSED_LO',        0x0080),
    ('IMAGE_FILE_32BIT_MACHINE',            0x0100),
    ('IMAGE_FILE_DEBUG_STRIPPED',           0x0200),
    ('IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP',  0x0400),
    ('IMAGE_FILE_NET_RUN_FROM_SWAP',        0x0800),
    ('IMAGE_FILE_SYSTEM',                   0x1000),
    ('IMAGE_FILE_DLL',                      0x2000),
    ('IMAGE_FILE_UP_SYSTEM_ONLY',           0x4000),
    ('IMAGE_FILE_BYTES_REVERSED_HI',        0x8000) ]

IMAGE_CHARACTERISTICS = dict([(e[1], e[0]) for e in
    image_characteristics]+image_characteristics)

section_characteristics = [
    ('IMAGE_SCN_CNT_CODE',                  0x00000020),
    ('IMAGE_SCN_CNT_INITIALIZED_DATA',      0x00000040),
    ('IMAGE_SCN_CNT_UNINITIALIZED_DATA',    0x00000080),
    ('IMAGE_SCN_LNK_OTHER',                 0x00000100),
    ('IMAGE_SCN_LNK_INFO',                  0x00000200),
    ('IMAGE_SCN_LNK_REMOVE',                0x00000800),
    ('IMAGE_SCN_LNK_COMDAT',                0x00001000),
    ('IMAGE_SCN_MEM_FARDATA',               0x00008000),
    ('IMAGE_SCN_MEM_PURGEABLE',             0x00020000),
    ('IMAGE_SCN_MEM_16BIT',                 0x00020000),
    ('IMAGE_SCN_MEM_LOCKED',                0x00040000),
    ('IMAGE_SCN_MEM_PRELOAD',               0x00080000),
    ('IMAGE_SCN_ALIGN_1BYTES',              0x00100000),
    ('IMAGE_SCN_ALIGN_2BYTES',              0x00200000),
    ('IMAGE_SCN_ALIGN_4BYTES',              0x00300000),
    ('IMAGE_SCN_ALIGN_8BYTES',              0x00400000),
    ('IMAGE_SCN_ALIGN_16BYTES',             0x00500000),
    ('IMAGE_SCN_ALIGN_32BYTES',             0x00600000),
    ('IMAGE_SCN_ALIGN_64BYTES',             0x00700000),
    ('IMAGE_SCN_ALIGN_128BYTES',            0x00800000),
    ('IMAGE_SCN_ALIGN_256BYTES',            0x00900000),
    ('IMAGE_SCN_ALIGN_512BYTES',            0x00A00000),
    ('IMAGE_SCN_ALIGN_1024BYTES',           0x00B00000),
    ('IMAGE_SCN_ALIGN_2048BYTES',           0x00C00000),
    ('IMAGE_SCN_ALIGN_4096BYTES',           0x00D00000),
    ('IMAGE_SCN_ALIGN_8192BYTES',           0x00E00000),
    ('IMAGE_SCN_ALIGN_MASK',                0x00F00000),
    ('IMAGE_SCN_LNK_NRELOC_OVFL',           0x01000000),
    ('IMAGE_SCN_MEM_DISCARDABLE',           0x02000000),
    ('IMAGE_SCN_MEM_NOT_CACHED',            0x04000000),
    ('IMAGE_SCN_MEM_NOT_PAGED',             0x08000000),
    ('IMAGE_SCN_MEM_SHARED',                0x10000000),
    ('IMAGE_SCN_MEM_EXECUTE',               0x20000000),
    ('IMAGE_SCN_MEM_READ',                  0x40000000),
    ('IMAGE_SCN_MEM_WRITE',                 0x80000000L) ]
 
SECTION_CHARACTERISTICS = dict([(e[1], e[0]) for e in
    section_characteristics]+section_characteristics)

debug_types = [
    ('IMAGE_DEBUG_TYPE_UNKNOWN',        0),
    ('IMAGE_DEBUG_TYPE_COFF',           1),
    ('IMAGE_DEBUG_TYPE_CODEVIEW',       2),
    ('IMAGE_DEBUG_TYPE_FPO',            3),
    ('IMAGE_DEBUG_TYPE_MISC',           4),
    ('IMAGE_DEBUG_TYPE_EXCEPTION',      5),
    ('IMAGE_DEBUG_TYPE_FIXUP',          6),
    ('IMAGE_DEBUG_TYPE_OMAP_TO_SRC',    7),
    ('IMAGE_DEBUG_TYPE_OMAP_FROM_SRC',  8),
    ('IMAGE_DEBUG_TYPE_BORLAND',        9),
    ('IMAGE_DEBUG_TYPE_RESERVED10',     10) ]

DEBUG_TYPE = dict([(e[1], e[0]) for e in debug_types]+debug_types)

subsystem_types = [
    ('IMAGE_SUBSYSTEM_UNKNOWN',     0),
    ('IMAGE_SUBSYSTEM_NATIVE',      1),
    ('IMAGE_SUBSYSTEM_WINDOWS_GUI', 2),
    ('IMAGE_SUBSYSTEM_WINDOWS_CUI', 3),
    ('IMAGE_SUBSYSTEM_OS2_CUI',     5),
    ('IMAGE_SUBSYSTEM_POSIX_CUI',   7),
    ('IMAGE_SUBSYSTEM_WINDOWS_CE_GUI',  9),
    ('IMAGE_SUBSYSTEM_EFI_APPLICATION', 10),
    ('IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER', 11),
    ('IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER',      12),
    ('IMAGE_SUBSYSTEM_EFI_ROM',     13),
    ('IMAGE_SUBSYSTEM_XBOX',        14)]

SUBSYSTEM_TYPE = dict([(e[1], e[0]) for e in subsystem_types]+subsystem_types)

machine_types = [
    ('IMAGE_FILE_MACHINE_UNKNOWN',  0),
    ('IMAGE_FILE_MACHINE_AM33',     0x1d3),
    ('IMAGE_FILE_MACHINE_AMD64',    0x8664),
    ('IMAGE_FILE_MACHINE_ARM',      0x1c0),
    ('IMAGE_FILE_MACHINE_EBC',      0xebc),
    ('IMAGE_FILE_MACHINE_I386',     0x14c),
    ('IMAGE_FILE_MACHINE_IA64',     0x200),
    ('IMAGE_FILE_MACHINE_MR32',     0x9041),
    ('IMAGE_FILE_MACHINE_MIPS16',   0x266),
    ('IMAGE_FILE_MACHINE_MIPSFPU',  0x366),
    ('IMAGE_FILE_MACHINE_MIPSFPU16',0x466),
    ('IMAGE_FILE_MACHINE_POWERPC',  0x1f0),
    ('IMAGE_FILE_MACHINE_POWERPCFP',0x1f1),
    ('IMAGE_FILE_MACHINE_R4000',    0x166),
    ('IMAGE_FILE_MACHINE_SH3',      0x1a2),
    ('IMAGE_FILE_MACHINE_SH3DSP',   0x1a3),
    ('IMAGE_FILE_MACHINE_SH4',      0x1a6),
    ('IMAGE_FILE_MACHINE_SH5',      0x1a8),
    ('IMAGE_FILE_MACHINE_THUMB',    0x1c2),
    ('IMAGE_FILE_MACHINE_WCEMIPSV2',0x169),
 ]

MACHINE_TYPE = dict([(e[1], e[0]) for e in machine_types]+machine_types)

relocation_types = [
    ('IMAGE_REL_BASED_ABSOLUTE',        0),
    ('IMAGE_REL_BASED_HIGH',            1),
    ('IMAGE_REL_BASED_LOW',             2),
    ('IMAGE_REL_BASED_HIGHLOW',         3),
    ('IMAGE_REL_BASED_HIGHADJ',         4),
    ('IMAGE_REL_BASED_MIPS_JMPADDR',    5),
    ('IMAGE_REL_BASED_SECTION',         6),
    ('IMAGE_REL_BASED_REL',             7),
    ('IMAGE_REL_BASED_MIPS_JMPADDR16',  9),
    ('IMAGE_REL_BASED_IA64_IMM64',      9),
    ('IMAGE_REL_BASED_DIR64',           10),
    ('IMAGE_REL_BASED_HIGH3ADJ',        11) ]

RELOCATION_TYPE = dict([(e[1], e[0]) for e in relocation_types]+relocation_types)

dll_characteristics = [
    ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0001', 0x0001),
    ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0002', 0x0002),
    ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0004', 0x0004),
    ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0008', 0x0008),
    ('IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE',      0x0040),
    ('IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY',   0x0080),
    ('IMAGE_DLL_CHARACTERISTICS_NX_COMPAT',         0x0100),
    ('IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION',      0x0200),
    ('IMAGE_DLL_CHARACTERISTICS_NO_SEH',    0x0400),
    ('IMAGE_DLL_CHARACTERISTICS_NO_BIND',   0x0800),
    ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x1000', 0x1000),
    ('IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER',    0x2000),
    ('IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE', 0x8000) ]

DLL_CHARACTERISTICS = dict([(e[1], e[0]) for e in dll_characteristics]+dll_characteristics)

# Resource types
resource_type = [
    ('RT_CURSOR',          1),
    ('RT_BITMAP',          2),
    ('RT_ICON',            3),
    ('RT_MENU',            4),
    ('RT_DIALOG',          5),
    ('RT_STRING',          6),
    ('RT_FONTDIR',         7),
    ('RT_FONT',            8),
    ('RT_ACCELERATOR',     9),
    ('RT_RCDATA',          10),
    ('RT_MESSAGETABLE',    11),
    ('RT_GROUP_CURSOR',    12),
    ('RT_GROUP_ICON',      14),
    ('RT_VERSION',         16),
    ('RT_DLGINCLUDE',      17),
    ('RT_PLUGPLAY',        19),
    ('RT_VXD',             20),
    ('RT_ANICURSOR',       21),
    ('RT_ANIICON',         22),
    ('RT_HTML',            23),
    ('RT_MANIFEST',        24) ]

RESOURCE_TYPE = dict([(e[1], e[0]) for e in resource_type]+resource_type)

# Language definitions
lang = [
 ('LANG_NEUTRAL',       0x00),
 ('LANG_INVARIANT',     0x7f),
 ('LANG_AFRIKAANS',     0x36),
 ('LANG_ALBANIAN',      0x1c),
 ('LANG_ARABIC',        0x01),
 ('LANG_ARMENIAN',      0x2b),
 ('LANG_ASSAMESE',      0x4d),
 ('LANG_AZERI',         0x2c),
 ('LANG_BASQUE',        0x2d),
 ('LANG_BELARUSIAN',    0x23),
 ('LANG_BENGALI',       0x45),
 ('LANG_BULGARIAN',     0x02),
 ('LANG_CATALAN',       0x03),
 ('LANG_CHINESE',       0x04),
 ('LANG_CROATIAN',      0x1a),
 ('LANG_CZECH',         0x05),
 ('LANG_DANISH',        0x06),
 ('LANG_DIVEHI',        0x65),
 ('LANG_DUTCH',         0x13),
 ('LANG_ENGLISH',       0x09),
 ('LANG_ESTONIAN',      0x25),
 ('LANG_FAEROESE',      0x38),
 ('LANG_FARSI',         0x29),
 ('LANG_FINNISH',       0x0b),
 ('LANG_FRENCH',        0x0c),
 ('LANG_GALICIAN',      0x56),
 ('LANG_GEORGIAN',      0x37),
 ('LANG_GERMAN',        0x07),
 ('LANG_GREEK',         0x08),
 ('LANG_GUJARATI',      0x47),
 ('LANG_HEBREW',        0x0d),
 ('LANG_HINDI',         0x39),
 ('LANG_HUNGARIAN',     0x0e),
 ('LANG_ICELANDIC',     0x0f),
 ('LANG_INDONESIAN',    0x21),
 ('LANG_ITALIAN',       0x10),
 ('LANG_JAPANESE',      0x11),
 ('LANG_KANNADA',       0x4b),
 ('LANG_KASHMIRI',      0x60),
 ('LANG_KAZAK',         0x3f),
 ('LANG_KONKANI',       0x57),
 ('LANG_KOREAN',        0x12),
 ('LANG_KYRGYZ',        0x40),
 ('LANG_LATVIAN',       0x26),
 ('LANG_LITHUANIAN',    0x27),
 ('LANG_MACEDONIAN',    0x2f),
 ('LANG_MALAY',         0x3e),
 ('LANG_MALAYALAM',     0x4c),
 ('LANG_MANIPURI',      0x58),
 ('LANG_MARATHI',       0x4e),
 ('LANG_MONGOLIAN',     0x50),
 ('LANG_NEPALI',        0x61),
 ('LANG_NORWEGIAN',     0x14),
 ('LANG_ORIYA',         0x48),
 ('LANG_POLISH',        0x15),
 ('LANG_PORTUGUESE',    0x16),
 ('LANG_PUNJABI',       0x46),
 ('LANG_ROMANIAN',      0x18),
 ('LANG_RUSSIAN',       0x19),
 ('LANG_SANSKRIT',      0x4f),
 ('LANG_SERBIAN',       0x1a),
 ('LANG_SINDHI',        0x59),
 ('LANG_SLOVAK',        0x1b),
 ('LANG_SLOVENIAN',     0x24),
 ('LANG_SPANISH',       0x0a),
 ('LANG_SWAHILI',       0x41),
 ('LANG_SWEDISH',       0x1d),
 ('LANG_SYRIAC',        0x5a),
 ('LANG_TAMIL',         0x49),
 ('LANG_TATAR',         0x44),
 ('LANG_TELUGU',        0x4a),
 ('LANG_THAI',          0x1e),
 ('LANG_TURKISH',       0x1f),
 ('LANG_UKRAINIAN',     0x22),
 ('LANG_URDU',          0x20),
 ('LANG_UZBEK',         0x43),
 ('LANG_VIETNAMESE',    0x2a),
 ('LANG_GAELIC',        0x3c),
 ('LANG_MALTESE',       0x3a),
 ('LANG_MAORI',         0x28),
 ('LANG_RHAETO_ROMANCE',0x17),
 ('LANG_SAAMI',         0x3b),
 ('LANG_SORBIAN',       0x2e),
 ('LANG_SUTU',          0x30),
 ('LANG_TSONGA',        0x31),
 ('LANG_TSWANA',        0x32),
 ('LANG_VENDA',         0x33),
 ('LANG_XHOSA',         0x34),
 ('LANG_ZULU',          0x35),
 ('LANG_ESPERANTO',     0x8f),
 ('LANG_WALON',         0x90),
 ('LANG_CORNISH',       0x91),
 ('LANG_WELSH',         0x92),
 ('LANG_BRETON',        0x93) ]

LANG = dict(lang+[(e[1], e[0]) for e in lang])

# Sublanguage definitions
sublang =  [
 ('SUBLANG_NEUTRAL',                        0x00),
 ('SUBLANG_DEFAULT',                        0x01),
 ('SUBLANG_SYS_DEFAULT',                    0x02),
 ('SUBLANG_ARABIC_SAUDI_ARABIA',            0x01),
 ('SUBLANG_ARABIC_IRAQ',                    0x02),
 ('SUBLANG_ARABIC_EGYPT',                   0x03),
 ('SUBLANG_ARABIC_LIBYA',                   0x04),
 ('SUBLANG_ARABIC_ALGERIA',                 0x05),
 ('SUBLANG_ARABIC_MOROCCO',                 0x06),
 ('SUBLANG_ARABIC_TUNISIA',                 0x07),
 ('SUBLANG_ARABIC_OMAN',                    0x08),
 ('SUBLANG_ARABIC_YEMEN',                   0x09),
 ('SUBLANG_ARABIC_SYRIA',                   0x0a),
 ('SUBLANG_ARABIC_JORDAN',                  0x0b),
 ('SUBLANG_ARABIC_LEBANON',                 0x0c),
 ('SUBLANG_ARABIC_KUWAIT',                  0x0d),
 ('SUBLANG_ARABIC_UAE',                     0x0e),
 ('SUBLANG_ARABIC_BAHRAIN',                 0x0f),
 ('SUBLANG_ARABIC_QATAR',                   0x10),
 ('SUBLANG_AZERI_LATIN',                    0x01),
 ('SUBLANG_AZERI_CYRILLIC',                 0x02),
 ('SUBLANG_CHINESE_TRADITIONAL',            0x01),
 ('SUBLANG_CHINESE_SIMPLIFIED',             0x02),
 ('SUBLANG_CHINESE_HONGKONG',               0x03),
 ('SUBLANG_CHINESE_SINGAPORE',              0x04),
 ('SUBLANG_CHINESE_MACAU',                  0x05),
 ('SUBLANG_DUTCH',                          0x01),
 ('SUBLANG_DUTCH_BELGIAN',                  0x02),
 ('SUBLANG_ENGLISH_US',                     0x01),
 ('SUBLANG_ENGLISH_UK',                     0x02),
 ('SUBLANG_ENGLISH_AUS',                    0x03),
 ('SUBLANG_ENGLISH_CAN',                    0x04),
 ('SUBLANG_ENGLISH_NZ',                     0x05),
 ('SUBLANG_ENGLISH_EIRE',                   0x06),
 ('SUBLANG_ENGLISH_SOUTH_AFRICA',           0x07),
 ('SUBLANG_ENGLISH_JAMAICA',                0x08),
 ('SUBLANG_ENGLISH_CARIBBEAN',              0x09),
 ('SUBLANG_ENGLISH_BELIZE',                 0x0a),
 ('SUBLANG_ENGLISH_TRINIDAD',               0x0b),
 ('SUBLANG_ENGLISH_ZIMBABWE',               0x0c),
 ('SUBLANG_ENGLISH_PHILIPPINES',            0x0d),
 ('SUBLANG_FRENCH',                         0x01),
 ('SUBLANG_FRENCH_BELGIAN',                 0x02),
 ('SUBLANG_FRENCH_CANADIAN',                0x03),
 ('SUBLANG_FRENCH_SWISS',                   0x04),
 ('SUBLANG_FRENCH_LUXEMBOURG',              0x05),
 ('SUBLANG_FRENCH_MONACO',                  0x06),
 ('SUBLANG_GERMAN',                         0x01),
 ('SUBLANG_GERMAN_SWISS',                   0x02),
 ('SUBLANG_GERMAN_AUSTRIAN',                0x03),
 ('SUBLANG_GERMAN_LUXEMBOURG',              0x04),
 ('SUBLANG_GERMAN_LIECHTENSTEIN',           0x05),
 ('SUBLANG_ITALIAN',                        0x01),
 ('SUBLANG_ITALIAN_SWISS',                  0x02),
 ('SUBLANG_KASHMIRI_SASIA',                 0x02),
 ('SUBLANG_KASHMIRI_INDIA',                 0x02),
 ('SUBLANG_KOREAN',                         0x01),
 ('SUBLANG_LITHUANIAN',                     0x01),
 ('SUBLANG_MALAY_MALAYSIA',                 0x01),
 ('SUBLANG_MALAY_BRUNEI_DARUSSALAM',        0x02),
 ('SUBLANG_NEPALI_INDIA',                   0x02),
 ('SUBLANG_NORWEGIAN_BOKMAL',               0x01),
 ('SUBLANG_NORWEGIAN_NYNORSK',              0x02),
 ('SUBLANG_PORTUGUESE',                     0x02),
 ('SUBLANG_PORTUGUESE_BRAZILIAN',           0x01),
 ('SUBLANG_SERBIAN_LATIN',                  0x02),
 ('SUBLANG_SERBIAN_CYRILLIC',               0x03),
 ('SUBLANG_SPANISH',                        0x01),
 ('SUBLANG_SPANISH_MEXICAN',                0x02),
 ('SUBLANG_SPANISH_MODERN',                 0x03),
 ('SUBLANG_SPANISH_GUATEMALA',              0x04),
 ('SUBLANG_SPANISH_COSTA_RICA',             0x05),
 ('SUBLANG_SPANISH_PANAMA',                 0x06),
 ('SUBLANG_SPANISH_DOMINICAN_REPUBLIC',     0x07),
 ('SUBLANG_SPANISH_VENEZUELA',              0x08),
 ('SUBLANG_SPANISH_COLOMBIA',               0x09),
 ('SUBLANG_SPANISH_PERU',                   0x0a),
 ('SUBLANG_SPANISH_ARGENTINA',              0x0b),
 ('SUBLANG_SPANISH_ECUADOR',                0x0c),
 ('SUBLANG_SPANISH_CHILE',                  0x0d),
 ('SUBLANG_SPANISH_URUGUAY',                0x0e),
 ('SUBLANG_SPANISH_PARAGUAY',               0x0f),
 ('SUBLANG_SPANISH_BOLIVIA',                0x10),
 ('SUBLANG_SPANISH_EL_SALVADOR',            0x11),
 ('SUBLANG_SPANISH_HONDURAS',               0x12),
 ('SUBLANG_SPANISH_NICARAGUA',              0x13),
 ('SUBLANG_SPANISH_PUERTO_RICO',            0x14),
 ('SUBLANG_SWEDISH',                        0x01),
 ('SUBLANG_SWEDISH_FINLAND',                0x02),
 ('SUBLANG_URDU_PAKISTAN',                  0x01),
 ('SUBLANG_URDU_INDIA',                     0x02),
 ('SUBLANG_UZBEK_LATIN',                    0x01),
 ('SUBLANG_UZBEK_CYRILLIC',                 0x02),
 ('SUBLANG_DUTCH_SURINAM',                  0x03),
 ('SUBLANG_ROMANIAN',                       0x01),
 ('SUBLANG_ROMANIAN_MOLDAVIA',              0x02),
 ('SUBLANG_RUSSIAN',                        0x01),
 ('SUBLANG_RUSSIAN_MOLDAVIA',               0x02),
 ('SUBLANG_CROATIAN',                       0x01),
 ('SUBLANG_LITHUANIAN_CLASSIC',             0x02),
 ('SUBLANG_GAELIC',                         0x01),
 ('SUBLANG_GAELIC_SCOTTISH',                0x02),
 ('SUBLANG_GAELIC_MANX',                    0x03) ]

SUBLANG = dict(sublang+[(e[1], e[0]) for e in sublang])

class UnicodeStringWrapperPostProcessor:
    """This class attemps to help the process of identifying strings
    that might be plain Unicode or Pascal. A list of strings will be
    wrapped on it with the hope the overlappings will help make the 
    decission about their type."""
    
    def __init__(self, pe, rva_ptr):
        self.pe = pe
        self.rva_ptr = rva_ptr
        self.string = None
        
        
    def get_rva(self):
        """Get the RVA of the string."""
        
        return self.rva_ptr
        
        
    def __str__(self):
        """Return the escaped ASCII representation of the string."""
    
        def convert_char(char):
            if char in string.printable:
                return char
            else:
                return r'\x%02x' % ord(char)
                
        if self.string:
            return ''.join([convert_char(c) for c in self.string])
            
        return ''
        
    
    def invalidate(self):
        """Make this instance None, to express it's no known string type."""
        
        self = None
    
        
    def render_pascal_16(self):
    
        self.string = self.pe.get_string_u_at_rva(
            self.rva_ptr+2, 
            max_length=self.__get_pascal_16_length())

def ask_pascal_16(self, next_rva_ptr):
        """The next RVA is taken to be the one immediately following this one.
        
        Such RVA could indicate the natural end of the string and will be checked
        with the possible length contained in the first word.
        """
        
        length = self.__get_pascal_16_length()
        
        if length == (next_rva_ptr - (self.rva_ptr+2)) / 2:
            self.length = length
            return True
            
        return False
        
        
    def __get_pascal_16_length(self):
    
        return self.__get_word_value_at_rva(self.rva_ptr)
        
    
    def __get_word_value_at_rva(self, rva):

try:
            data = self.pe.get_data(self.rva_ptr, 2)
        except PEFormatError, e:
            return False

if len(data)<2:
            return False

return struct.unpack('<H', data)[0]

#def render_pascal_8(self):
    #    """"""
        
        
    def ask_unicode_16(self, next_rva_ptr):
        """The next RVA is taken to be the one immediately following this one.
        
        Such RVA could indicate the natural end of the string and will be checked
        to see if there's a Unicode NULL character there.
        """
        
        if self.__get_word_value_at_rva(next_rva_ptr-2) == 0:
            self.length = next_rva_ptr - self.rva_ptr
            return True
            
        return False

def render_unicode_16(self):
        """"""

self.string = self.pe.get_string_u_at_rva(self.rva_ptr)

class PEFormatError(Exception):
    """Generic PE format error exception."""
    
    def __init__(self, value):
        self.value = value

def __str__(self):
        return repr(self.value)

class Dump:
    """Convenience class for dumping the PE information."""
    
    def __init__(self):
        self.text = ''
    
        
    def add_lines(self, txt, indent=0):
        """Adds a list of lines.
        
        The list can be indented with the optional argument 'indent'.
        """
        for line in txt:
            self.add_line(line, indent)
        
            
    def add_line(self, txt, indent=0):
        """Adds a line.
        
        The line can be indented with the optional argument 'indent'.
        """
        
        self.add(txt+'\n', indent)
    
        
    def add(self, txt, indent=0):
        """Adds some text, no newline will be appended.
        
        The text can be indented with the optional argument 'indent'.
        """
        
        if isinstance(txt, unicode):
            s = []
            for c in txt:
                try:
                    s.append(str(c))
                except UnicodeEncodeError, e:
                    s.append(repr(c))
                        
            txt = ''.join(s)
        
        self.text += ' '*indent+txt
    
        
    def add_header(self, txt):
        """Adds a header element."""
        
        self.add_line('-'*10+txt+'-'*10+'\n')
        
        
    def add_newline(self):
        """Adds a newline."""
        
        self.text += '\n'
        
        
    def get_text(self):
        """Get the text in its current state."""
    
        return self.text

class Structure:
    """Prepare structure object to extract members from data.
    
    Format is a list containing definitions for the elements
    of the structure.
    """
    
    
    def __init__(self, format, name=None, file_offset=None):
        # Format is forced little endian, for big endian non Intel platforms
        self.__format__ = '<'
        self.__keys__ = []
#        self.values = {}
        self.__format_length__ = 0
        self.__set_format__(format[1])
        self._all_zeroes = False
        self.__unpacked_data_elms__ = None
        self.__file_offset__ = file_offset
        if name:
            self.name = name
        else:
            self.name = format[0]
                
            
    def __get_format__(self):
        return self.__format__
        
        
    def get_file_offset(self):
        return self.__file_offset__

def set_file_offset(self, offset):
        self.__file_offset__ = offset
        
    def all_zeroes(self):
        """Returns true is the unpacked data is all zeroes."""
        
        return self._all_zeroes

def __set_format__(self, format):
    
        for elm in format:
            if ',' in elm:
                elm_type, elm_name = elm.split(',', 1)
                self.__format__ += elm_type
                
                elm_names = elm_name.split(',')
                names = []
                for elm_name in elm_names:
                    if elm_name in self.__keys__:
                        search_list = [x[:len(elm_name)] for x in self.__keys__]
                        occ_count = search_list.count(elm_name)
                        elm_name = elm_name+'_'+str(occ_count)
                    names.append(elm_name)
                # Some PE header structures have unions on them, so a certain
                # value might have different names, so each key has a list of
                # all the possible members referring to the data.
                self.__keys__.append(names)
                    
        self.__format_length__ = struct.calcsize(self.__format__)
        
        
    def sizeof(self):
        """Return size of the structure."""
    
        return self.__format_length__
        
        
    def __unpack__(self, data):
    
        if len(data)>self.__format_length__:
            data = data[:self.__format_length__]
            
        # OC Patch:
        # Some malware have incorrect header lengths.
        # Fail gracefully if this occurs
        # Buggy malware: a29b0118af8b7408444df81701ad5a7f
        #
        elif len(data)<self.__format_length__:
            raise PEFormatError('Data length less than expected header length.')

if data.count(chr(0)) == len(data):
            self._all_zeroes = True
            
        self.__unpacked_data_elms__ = struct.unpack(self.__format__, data)
        for i in xrange(len(self.__unpacked_data_elms__)):
            for key in self.__keys__[i]:
#                self.values[key] = self.__unpacked_data_elms__[i]
                setattr(self, key, self.__unpacked_data_elms__[i])

def __pack__(self):
    
        new_values = []
        
        for i in xrange(len(self.__unpacked_data_elms__)):
        
            for key in self.__keys__[i]:
                new_val = getattr(self, key)
                old_val = self.__unpacked_data_elms__[i]
                
                # In the case of Unions, when the first changed value
                # is picked the loop is exited
                if new_val != old_val:
                    break
                
            new_values.append(new_val)
            
        return struct.pack(self.__format__, *new_values)
        
                
    def __str__(self):
        return '\n'.join( self.dump() )

def __repr__(self):
        return '<Structure: %s>' % (' '.join( [' '.join(s.split()) for s in self.dump()] ))
        
        
    def dump(self, indentation=0):
        """Returns a string representation of the structure."""
    
        dump = []
        
        dump.append('[%s]' % self.name)

# Refer to the __set_format__ method for an explanation
        # of the following construct.
        for keys in self.__keys__:
            for key in keys:

val = getattr(self, key)
                if isinstance(val, int) or isinstance(val, long):
                    val_str = '0x%-8X' % (val)
                    if key == 'TimeDateStamp' or key == 'dwTimeStamp':
                        try:
                            val_str += ' [%s UTC]' % time.asctime(time.gmtime(val))
                        except exceptions.ValueError, e:
                            val_str += ' [INVALID TIME]'
                else:
                    val_str = ''.join(filter(lambda c:c != '\0', str(val)))

dump.append('%-30s %s' % (key+':', val_str))

return dump

class SectionStructure(Structure):
    """Convenience section handling class."""

def get_data(self, start, length=None):
        """Get data chunk from a section.
        
        Allows to query data from the section by passing the
        addresses where the PE file would be loaded by default.
        It is then possible to retrieve code and data by its real
        addresses as it would be if loaded.
        """

offset = start - self.VirtualAddress

if length:
            end = offset+length
        else:
            end = len(self.data)
            
        return self.data[offset:end]

def get_rva_from_offset(self, offset):
        return offset - self.PointerToRawData + self.VirtualAddress

def get_offset_from_rva(self, rva):
        return (rva - self.VirtualAddress) + self.PointerToRawData

def contains_offset(self, offset):
        """Check whether the section contains the file offset provided."""

if not self.PointerToRawData:
           # bss and other sections containing only uninitialized data must have 0
           # and do not take space in the file
           return False
        return self.PointerToRawData <= offset < self.VirtualAddress + self.SizeOfRawData

def contains_rva(self, rva):
        """Check whether the section contains the address provided."""

# PECOFF documentation v8 says:
        # The total size of the section when loaded into memory.
        # If this value is greater than SizeOfRawData, the section is zero-padded.
        # This field is valid only for executable images and should be set to zero
        # for object files.

if len(self.data) < self.SizeOfRawData:
            size = self.Misc_VirtualSize
        else:
            size = max(self.SizeOfRawData, self.Misc_VirtualSize)
            
        return self.VirtualAddress <= rva < self.VirtualAddress + size

def contains(self, rva):
        #print "DEPRECATION WARNING: you should use contains_rva() instead of contains()"
        return self.contains_rva(rva)

def set_data(self, data):
        """Set the data belonging to the section."""
        
        self.data = data
        
        
    def get_entropy(self):
        """Calculate and return the entropy for the section."""
        
        return self.entropy_H( self.data )

def get_hash_sha1(self):
        """Get the SHA-1 hex-digest of the section's data."""
        
        if sha1 is not None:
            return sha1( self.data ).hexdigest()

def get_hash_sha256(self):
        """Get the SHA-256 hex-digest of the section's data."""
        
        if sha256 is not None:
            return sha256( self.data ).hexdigest()

def get_hash_sha512(self):
        """Get the SHA-512 hex-digest of the section's data."""
        
        if sha512 is not None:
            return sha512( self.data ).hexdigest()

def get_hash_md5(self):
        """Get the MD5 hex-digest of the section's data."""
        
        if md5 is not None:
            return md5( self.data ).hexdigest()

def entropy_H(self, data):
        """Calculate the entropy of a chunk of data."""

if len(data) == 0:
            return 0.0
    
        occurences = array.array('L', [0]*256)
    
        for x in data:
            occurences[ord(x)] += 1
    
        entropy = 0
        for x in occurences:
            if x:
                p_x = float(x) / len(data)
                entropy -= p_x*math.log(p_x, 2)
    
        return entropy

class DataContainer:
    """Generic data container."""
	
    def __init__(self, **args):
        for key, value in args.items():
            setattr(self, key, value)

class ImportDescData(DataContainer):
    """Holds import descriptor information.
    
    dll:        name of the imported DLL
    imports:    list of imported symbols (ImportData instances)
    struct:     IMAGE_IMPORT_DESCRIPTOR sctruture
    """

class ImportData(DataContainer):
    """Holds imported symbol's information.
    
    ordinal:    Ordinal of the symbol
    name:       Name of the symbol
    bound:      If the symbol is bound, this contains
                the address.
    """
    
class ExportDirData(DataContainer):
    """Holds export directory information.
                    
    struct:     IMAGE_EXPORT_DIRECTORY structure
    symbols:    list of exported symbols (ExportData instances)
"""
    
class ExportData(DataContainer):
    """Holds exported symbols' information.
    
    ordinal:    ordinal of the symbol
    address:    address of the symbol
    name:       name of the symbol (None if the symbol is
                exported by ordinal only)
    forwarder:  if the symbol is forwarded it will
                contain the name of the target symbol,
                None otherwise.
    """

class ResourceDirData(DataContainer):
    """Holds resource directory information.
    
    struct:     IMAGE_RESOURCE_DIRECTORY structure
    entries:    list of entries (ResourceDirEntryData instances)
    """
    
class ResourceDirEntryData(DataContainer):
    """Holds resource directory entry data.
    
    struct:     IMAGE_RESOURCE_DIRECTORY_ENTRY structure
    name:       If the resource is identified by name this
                attribute will contain the name string. None
                otherwise. If identified by id, the id is
                availabe at 'struct.Id'
    id:         the id, also in struct.Id
    directory:  If this entry has a lower level directory
                this attribute will point to the
                ResourceDirData instance representing it.
    data:       If this entry has no futher lower directories
                and points to the actual resource data, this
                attribute will reference the corresponding
                ResourceDataEntryData instance.
    (Either of the 'directory' or 'data' attribute will exist,
    but not both.)
    """

class ResourceDataEntryData(DataContainer):
    """Holds resource data entry information.
    
    struct:     IMAGE_RESOURCE_DATA_ENTRY structure
    lang:       Primary language ID
    sublang:    Sublanguage ID
    """

class DebugData(DataContainer):
    """Holds debug information.
    
    struct:     IMAGE_DEBUG_DIRECTORY structure
    """

class BaseRelocationData(DataContainer):
    """Holds base relocation information.
    
    struct:     IMAGE_BASE_RELOCATION structure
    entries:    list of relocation data (RelocationData instances)
    """
    
class RelocationData(DataContainer):
    """Holds relocation information.
    
    type:       Type of relocation
                The type string is can be obtained by
                RELOCATION_TYPE[type]
    rva:        RVA of the relocation
    """

class TlsData(DataContainer):
    """Holds TLS information.
    
    struct:     IMAGE_TLS_DIRECTORY structure
    """

class BoundImportDescData(DataContainer):
    """Holds bound import descriptor data.
    
    This directory entry will provide with information on the
    DLLs this PE files has been bound to (if bound at all).
    The structure will contain the name and timestamp of the
    DLL at the time of binding so that the loader can know
    whether it differs from the one currently present in the
    system and must, therefore, re-bind the PE's imports.
    
    struct:     IMAGE_BOUND_IMPORT_DESCRIPTOR structure
    name:       DLL name
    entries:    list of entries (BoundImportRefData instances)
                the entries will exist if this DLL has forwarded
                symbols. If so, the destination DLL will have an
                entry in this list.
    """

class BoundImportRefData(DataContainer):
    """Holds bound import forwader reference data.
    
    Contains the same information as the bound descriptor but
    for forwarded DLLs, if any.
    
    struct:     IMAGE_BOUND_FORWARDER_REF structure
    name:       dll name
    """

class PE:
    """A Portable Executable representation.
    
    This class provides access to most of the information in a PE file.
    
    It expects to be supplied the name of the file to load or PE data
    to process and an optional argument 'fast_load' (False by default)
    which controls whether to load all the directories information,
    which can be quite time consuming.
    
    pe = pefile.PE('module.dll')
    pe = pefile.PE(name='module.dll')
    
    would load 'module.dll' and process it. If the data would be already
    available in a buffer the same could be achieved with:
    
    pe = pefile.PE(data=module_dll_data)
    
    The "fast_load" can be set to a default by setting its value in the
    module itself by means,for instance, of a "pefile.fast_load = True".
    That will make all the subsequent instances not to load the
    whole PE structure. The "full_load" method can be used to parse
    the missing data at a later stage.
    
    Basic headers information will be available in the attributes:
    
    DOS_HEADER
    NT_HEADERS
    FILE_HEADER
    OPTIONAL_HEADER
    
    All of them will contain among their attrbitues the members of the
    corresponding structures as defined in WINNT.H
    
    The raw data corresponding to the header (from the beginning of the
    file up to the start of the first section) will be avaiable in the
    instance's attribute 'header' as a string.
    
    The sections will be available as a list in the 'sections' attribute.
    Each entry will contain as attributes all the structure's members.
    
    Directory entries will be available as attributes (if they exist):
    (no other entries are processed at this point)
    
    DIRECTORY_ENTRY_IMPORT (list of ImportDescData instances)
    DIRECTORY_ENTRY_EXPORT (ExportDirData instance)
    DIRECTORY_ENTRY_RESOURCE (ResourceDirData instance)
    DIRECTORY_ENTRY_DEBUG (list of DebugData instances)
    DIRECTORY_ENTRY_BASERELOC (list of BaseRelocationData instances)
    DIRECTORY_ENTRY_TLS 
    DIRECTORY_ENTRY_BOUND_IMPORT (list of BoundImportData instances)
    
    The following dictionary attributes provide ways of mapping different
    constants. They will accept the numeric value and return the string
    representation and the opposite, feed in the string and get the
    numeric constant:
    
    DIRECTORY_ENTRY
    IMAGE_CHARACTERISTICS
    SECTION_CHARACTERISTICS
    DEBUG_TYPE
    SUBSYSTEM_TYPE
    MACHINE_TYPE
    RELOCATION_TYPE
    RESOURCE_TYPE
    LANG
    SUBLANG
    """

#
    # Format specifications for PE structures.
    #
    
    __IMAGE_DOS_HEADER_format__ = ('IMAGE_DOS_HEADER',
        ('H,e_magic', 'H,e_cblp', 'H,e_cp',
        'H,e_crlc', 'H,e_cparhdr', 'H,e_minalloc',
        'H,e_maxalloc', 'H,e_ss', 'H,e_sp', 'H,e_csum',
        'H,e_ip', 'H,e_cs', 'H,e_lfarlc', 'H,e_ovno', '8s,e_res',
        'H,e_oemid', 'H,e_oeminfo', '20s,e_res2',
        'L,e_lfanew'))
        
    __IMAGE_FILE_HEADER_format__ = ('IMAGE_FILE_HEADER',
        ('H,Machine', 'H,NumberOfSections',
        'L,TimeDateStamp', 'L,PointerToSymbolTable',
        'L,NumberOfSymbols', 'H,SizeOfOptionalHeader',
        'H,Characteristics'))
        
    __IMAGE_DATA_DIRECTORY_format__ = ('IMAGE_DATA_DIRECTORY',
        ('L,VirtualAddress', 'L,Size'))
    
    
    __IMAGE_OPTIONAL_HEADER_format__ = ('IMAGE_OPTIONAL_HEADER',
        ('H,Magic', 'B,MajorLinkerVersion',
        'B,MinorLinkerVersion', 'L,SizeOfCode',
        'L,SizeOfInitializedData', 'L,SizeOfUninitializedData',
        'L,AddressOfEntryPoint', 'L,BaseOfCode', 'L,BaseOfData',
        'L,ImageBase', 'L,SectionAlignment', 'L,FileAlignment',
        'H,MajorOperatingSystemVersion', 'H,MinorOperatingSystemVersion',
        'H,MajorImageVersion', 'H,MinorImageVersion',
        'H,MajorSubsystemVersion', 'H,MinorSubsystemVersion',
        'L,Reserved1', 'L,SizeOfImage', 'L,SizeOfHeaders',
        'L,CheckSum', 'H,Subsystem', 'H,DllCharacteristics',
        'L,SizeOfStackReserve', 'L,SizeOfStackCommit',
        'L,SizeOfHeapReserve', 'L,SizeOfHeapCommit',
        'L,LoaderFlags', 'L,NumberOfRvaAndSizes' ))

__IMAGE_OPTIONAL_HEADER64_format__ = ('IMAGE_OPTIONAL_HEADER64',
        ('H,Magic', 'B,MajorLinkerVersion',
        'B,MinorLinkerVersion', 'L,SizeOfCode',
        'L,SizeOfInitializedData', 'L,SizeOfUninitializedData',
        'L,AddressOfEntryPoint', 'L,BaseOfCode',
        'Q,ImageBase', 'L,SectionAlignment', 'L,FileAlignment',
        'H,MajorOperatingSystemVersion', 'H,MinorOperatingSystemVersion',
        'H,MajorImageVersion', 'H,MinorImageVersion',
        'H,MajorSubsystemVersion', 'H,MinorSubsystemVersion',
        'L,Reserved1', 'L,SizeOfImage', 'L,SizeOfHeaders',
        'L,CheckSum', 'H,Subsystem', 'H,DllCharacteristics',
        'Q,SizeOfStackReserve', 'Q,SizeOfStackCommit',
        'Q,SizeOfHeapReserve', 'Q,SizeOfHeapCommit',
        'L,LoaderFlags', 'L,NumberOfRvaAndSizes' ))

__IMAGE_NT_HEADERS_format__ = ('IMAGE_NT_HEADERS', ('L,Signature',))
        
    __IMAGE_SECTION_HEADER_format__ = ('IMAGE_SECTION_HEADER',
        ('8s,Name', 'L,Misc,Misc_PhysicalAddress,Misc_VirtualSize',
        'L,VirtualAddress', 'L,SizeOfRawData', 'L,PointerToRawData',
        'L,PointerToRelocations', 'L,PointerToLinenumbers',
        'H,NumberOfRelocations', 'H,NumberOfLinenumbers',
        'L,Characteristics'))

__IMAGE_DELAY_IMPORT_DESCRIPTOR_format__ = ('IMAGE_DELAY_IMPORT_DESCRIPTOR',
        ('L,grAttrs', 'L,szName', 'L,phmod', 'L,pIAT', 'L,pINT',
        'L,pBoundIAT', 'L,pUnloadIAT', 'L,dwTimeStamp'))

__IMAGE_IMPORT_DESCRIPTOR_format__ =  ('IMAGE_IMPORT_DESCRIPTOR',
        ('L,OriginalFirstThunk,Characteristics',
        'L,TimeDateStamp', 'L,ForwarderChain', 'L,Name', 'L,FirstThunk'))

__IMAGE_EXPORT_DIRECTORY_format__ =  ('IMAGE_EXPORT_DIRECTORY',
        ('L,Characteristics',
        'L,TimeDateStamp', 'H,MajorVersion', 'H,MinorVersion', 'L,Name',
        'L,Base', 'L,NumberOfFunctions', 'L,NumberOfNames',
        'L,AddressOfFunctions', 'L,AddressOfNames', 'L,AddressOfNameOrdinals'))

__IMAGE_RESOURCE_DIRECTORY_format__ = ('IMAGE_RESOURCE_DIRECTORY',
        ('L,Characteristics',
        'L,TimeDateStamp', 'H,MajorVersion', 'H,MinorVersion',
        'H,NumberOfNamedEntries', 'H,NumberOfIdEntries'))

__IMAGE_RESOURCE_DIRECTORY_ENTRY_format__ = ('IMAGE_RESOURCE_DIRECTORY_ENTRY',
        ('L,Name',
        'L,OffsetToData'))
            
    __IMAGE_RESOURCE_DATA_ENTRY_format__ = ('IMAGE_RESOURCE_DATA_ENTRY',
        ('L,OffsetToData', 'L,Size', 'L,CodePage', 'L,Reserved'))
    
    __VS_VERSIONINFO_format__ = ( 'VS_VERSIONINFO',
        ('H,Length', 'H,ValueLength', 'H,Type' ))
    
    __VS_FIXEDFILEINFO_format__ = ( 'VS_FIXEDFILEINFO',
        ('L,Signature', 'L,StrucVersion', 'L,FileVersionMS', 'L,FileVersionLS',
         'L,ProductVersionMS', 'L,ProductVersionLS', 'L,FileFlagsMask', 'L,FileFlags',
         'L,FileOS', 'L,FileType', 'L,FileSubtype', 'L,FileDateMS', 'L,FileDateLS'))
    
    __StringFileInfo_format__ = ( 'StringFileInfo',
        ('H,Length', 'H,ValueLength', 'H,Type' ))
    
    __StringTable_format__ = ( 'StringTable',
        ('H,Length', 'H,ValueLength', 'H,Type' ))
    
    __String_format__ = ( 'String',
        ('H,Length', 'H,ValueLength', 'H,Type' ))
    
    __Var_format__ = ( 'Var', ('H,Length', 'H,ValueLength', 'H,Type' ))

__IMAGE_THUNK_DATA_format__ = ('IMAGE_THUNK_DATA',
        ('L,ForwarderString,Function,Ordinal,AddressOfData',))

__IMAGE_THUNK_DATA64_format__ = ('IMAGE_THUNK_DATA',
        ('Q,ForwarderString,Function,Ordinal,AddressOfData',))

__IMAGE_DEBUG_DIRECTORY_format__ = ('IMAGE_DEBUG_DIRECTORY',
        ('L,Characteristics', 'L,TimeDateStamp', 'H,MajorVersion',
        'H,MinorVersion', 'L,Type', 'L,SizeOfData', 'L,AddressOfRawData',
        'L,PointerToRawData'))
    
    __IMAGE_BASE_RELOCATION_format__ = ('IMAGE_BASE_RELOCATION',
        ('L,VirtualAddress', 'L,SizeOfBlock') )

__IMAGE_TLS_DIRECTORY_format__ = ('IMAGE_TLS_DIRECTORY',
        ('L,StartAddressOfRawData', 'L,EndAddressOfRawData',
        'L,AddressOfIndex', 'L,AddressOfCallBacks',
        'L,SizeOfZeroFill', 'L,Characteristics' ) )

__IMAGE_TLS_DIRECTORY64_format__ = ('IMAGE_TLS_DIRECTORY',
        ('Q,StartAddressOfRawData', 'Q,EndAddressOfRawData',
        'Q,AddressOfIndex', 'Q,AddressOfCallBacks',
        'L,SizeOfZeroFill', 'L,Characteristics' ) )

__IMAGE_BOUND_IMPORT_DESCRIPTOR_format__ = ('IMAGE_BOUND_IMPORT_DESCRIPTOR',
        ('L,TimeDateStamp', 'H,OffsetModuleName', 'H,NumberOfModuleForwarderRefs'))

__IMAGE_BOUND_FORWARDER_REF_format__ = ('IMAGE_BOUND_FORWARDER_REF',
        ('L,TimeDateStamp', 'H,OffsetModuleName', 'H,Reserved') )

def __init__(self, name=None, data=None, fast_load=None):
    
        self.sections = []
        
        self.__warnings = []
        
        self.PE_TYPE = None
        
        if  not name and not data:
            return
            
        # This list will keep track of all the structures created.
        # That will allow for an easy iteration through the list
        # in order to save the modifications made
        self.__structures__ = []

if not fast_load:
            fast_load = globals()['fast_load']
        self.__parse__(name, data, fast_load)
                    
        
    
    def __unpack_data__(self, format, data, file_offset):
        """Apply structure format to raw data.
        
        Returns and unpacked structure object if successful, None otherwise.
        """
    
        structure = Structure(format, file_offset=file_offset)
        #if len(data) < structure.sizeof():
        #    return None
    
        try:
            structure.__unpack__(data)
        except PEFormatError, err:
            self.__warnings.append(
                'Corrupt header "%s" at file offset %d. Exception: %s' % (
                    format[0], file_offset, str(err))  )
            return None

self.__structures__.append(structure)
    
        return structure

def __parse__(self, fname, data, fast_load):
        """Parse a Portable Executable file.
        
        Loads a PE file, parsing all its structures and making them available
        through the instance's attributes.
        """
        
        if fname:
            fd = file(fname, 'rb')
            self.__data__ = fd.read()
            fd.close()
        elif data:
            self.__data__ = data

self.DOS_HEADER = self.__unpack_data__(
            self.__IMAGE_DOS_HEADER_format__,
            self.__data__, file_offset=0)
            
        if not self.DOS_HEADER or self.DOS_HEADER.e_magic != IMAGE_DOS_SIGNATURE:
            raise PEFormatError('DOS Header magic not found.')

# OC Patch:
        # Check for sane value in e_lfanew
        #                
        if self.DOS_HEADER.e_lfanew > len(self.__data__):
            raise PEFormatError('Invalid e_lfanew value, probably not a PE file')

nt_headers_offset = self.DOS_HEADER.e_lfanew

self.NT_HEADERS = self.__unpack_data__(
            self.__IMAGE_NT_HEADERS_format__,
            self.__data__[nt_headers_offset:],
            file_offset = nt_headers_offset)

# We better check the signature right here, before the file screws
        # around with sections:
        # OC Patch:
        # Some malware will cause the Signature value to not exist at all
        if not self.NT_HEADERS or not self.NT_HEADERS.Signature:
            raise PEFormatError('NT Headers not found.')

if self.NT_HEADERS.Signature != IMAGE_NT_SIGNATURE:
            raise PEFormatError('Invalid NT Headers signature.')
                
        self.FILE_HEADER = self.__unpack_data__(
            self.__IMAGE_FILE_HEADER_format__,
            self.__data__[nt_headers_offset+4:],
            file_offset = nt_headers_offset+4)
        image_flags = self.retrieve_flags(IMAGE_CHARACTERISTICS, 'IMAGE_FILE_')
        
        if not self.FILE_HEADER:
            raise PEFormatError('File Header missing')

# Set the image's flags according the the Characteristics member
        self.set_flags(self.FILE_HEADER, self.FILE_HEADER.Characteristics, image_flags)
        
        optional_header_offset =    \
            nt_headers_offset+4+self.FILE_HEADER.sizeof()

# Note: location of sections can be controlled from PE header:
        sections_offset = optional_header_offset + self.FILE_HEADER.SizeOfOptionalHeader

self.OPTIONAL_HEADER = self.__unpack_data__(
            self.__IMAGE_OPTIONAL_HEADER_format__,
            self.__data__[optional_header_offset:],
            file_offset = optional_header_offset)

# According to solardesigner's findings for his
        # Tiny PE project, the optional header does not
        # need fields beyond "Subsystem" in order to be
        # loadable by the Windows loader (given that zeroes
        # are acceptable values and the header is loaded
        # in a zeroed memory page)
        # If trying to parse a full Optional Header fails
        # we try to parse it again with some 0 padding
        #
        MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE = 69
        
        if ( self.OPTIONAL_HEADER is None and 
            len(self.__data__[optional_header_offset:])
                >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE ):
        
            # Add enough zeroes to make up for the unused fields
            #
            padding_length = 128
            
            # Create padding
            #
            padded_data = self.__data__[optional_header_offset:] + (
                '\0' * padding_length)
            
            self.OPTIONAL_HEADER = self.__unpack_data__(
                self.__IMAGE_OPTIONAL_HEADER_format__,
                padded_data,
                file_offset = optional_header_offset)
         
            
        # Check the Magic in the OPTIONAL_HEADER and set the PE file
        # type accordingly
        #
        if self.OPTIONAL_HEADER is not None:
        
            if self.OPTIONAL_HEADER.Magic == OPTIONAL_HEADER_MAGIC_PE:
            
                self.PE_TYPE = OPTIONAL_HEADER_MAGIC_PE
                
            elif self.OPTIONAL_HEADER.Magic == OPTIONAL_HEADER_MAGIC_PE_PLUS:
    
                self.PE_TYPE = OPTIONAL_HEADER_MAGIC_PE_PLUS
            
                self.OPTIONAL_HEADER = self.__unpack_data__(
                    self.__IMAGE_OPTIONAL_HEADER64_format__,
                    self.__data__[optional_header_offset:],
                    file_offset = optional_header_offset)

# Again, as explained above, we try to parse
                # a reduced form of the Optional Header which
                # is still valid despite not including all
                # structure members
                #
                MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE = 69+4

if ( self.OPTIONAL_HEADER is None and 
                    len(self.__data__[optional_header_offset:])
                        >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE ):
                
                    padding_length = 128
                    padded_data = self.__data__[optional_header_offset:] + (
                        '\0' * padding_length)
                    self.OPTIONAL_HEADER = self.__unpack_data__(
                        self.__IMAGE_OPTIONAL_HEADER64_format__,
                        padded_data,
                        file_offset = optional_header_offset)
        
        
        if not self.FILE_HEADER:
            raise PEFormatError('File Header missing')

# OC Patch:
        # Die gracefully if there is no OPTIONAL_HEADER field
        # 975440f5ad5e2e4a92c4d9a5f22f75c1
        if self.PE_TYPE is None or self.OPTIONAL_HEADER is None:
            raise PEFormatError("No Optional Header found, invalid PE32 or PE32+ file")
            
        dll_characteristics_flags = self.retrieve_flags(DLL_CHARACTERISTICS, 'IMAGE_DLL_CHARACTERISTICS_')

# Set the Dll Characteristics flags according the the DllCharacteristics member
        self.set_flags(
            self.OPTIONAL_HEADER,
            self.OPTIONAL_HEADER.DllCharacteristics,
            dll_characteristics_flags)

self.OPTIONAL_HEADER.DATA_DIRECTORY = []
        #offset = (optional_header_offset + self.FILE_HEADER.SizeOfOptionalHeader)
        offset = (optional_header_offset + self.OPTIONAL_HEADER.sizeof())
            
        
        self.NT_HEADERS.FILE_HEADER = self.FILE_HEADER
        self.NT_HEADERS.OPTIONAL_HEADER = self.OPTIONAL_HEADER

# The NumberOfRvaAndSizes is sanitized to stay within 
        # reasonable limits so can be casted to an int
        #
        if self.OPTIONAL_HEADER.NumberOfRvaAndSizes > 0x10:
            self.__warnings.append(
                'Suspicious NumberOfRvaAndSizes in the Optional Header. ' +
                'Normal values are never larger than 0x10, the value is: 0x%x' %
                self.OPTIONAL_HEADER.NumberOfRvaAndSizes )
                
        for i in xrange(int(0x7fffffffL & self.OPTIONAL_HEADER.NumberOfRvaAndSizes)):

if len(self.__data__[offset:]) == 0:
                break
                        
            if len(self.__data__[offset:]) < 8:
                data = self.__data__[offset:]+'\0'*8
            else:
                data = self.__data__[offset:]

dir_entry = self.__unpack_data__(
                self.__IMAGE_DATA_DIRECTORY_format__,
                data,
                file_offset = offset)
                
            if dir_entry is None:
                break

# Would fail if missing an entry
            # 1d4937b2fa4d84ad1bce0309857e70ca offending sample
            try:
                dir_entry.name = DIRECTORY_ENTRY[i]
            except (KeyError, AttributeError):
                break

offset += dir_entry.sizeof()
            
            self.OPTIONAL_HEADER.DATA_DIRECTORY.append(dir_entry)

# If the offset goes outside the optional header,
            # the loop is broken, regardless of how many directories
            # NumberOfRvaAndSizes says there are
            #
            # We assume a normally sized optional header, hence that we do
            # a sizeof() instead of reading SizeOfOptionalHeader.
            # Then we add a default number of drectories times their size,
            # if we go beyond that, we assume the number of directories
            # is wrong and stop processing
            if offset >= (optional_header_offset + 
                self.OPTIONAL_HEADER.sizeof() + 8*16) :
                
                break
                
                        
        offset = self.parse_sections(sections_offset)
        
        # OC Patch:
        # There could be a problem if there are no raw data sections
        # greater than 0
        # fc91013eb72529da005110a3403541b6 example
        # Should this throw an exception in the minimum header offset
        # can't be found?
        #
        rawDataPointers = [
            s.PointerToRawData for s in self.sections if s.PointerToRawData>0]
            
        if len(rawDataPointers) > 0:
            lowest_section_offset = min(rawDataPointers)
        else:
            lowest_section_offset = None

if not lowest_section_offset or lowest_section_offset<offset:
            self.header = self.__data__[:offset]
        else:
            self.header = self.__data__[:lowest_section_offset]

# Check whether the entry point lies within a section
        #
        if self.get_section_by_rva(self.OPTIONAL_HEADER.AddressOfEntryPoint) is not None:
        
            # Check whether the entry point lies within the file
            #
            ep_offset = self.get_offset_from_rva(self.OPTIONAL_HEADER.AddressOfEntryPoint)
            if ep_offset > len(self.__data__):
            
                self.__warnings.append(
                    'Possibly corrupt file. AddressOfEntryPoint lies outside the file. ' +
                    'AddressOfEntryPoint: 0x%x' %
                    self.OPTIONAL_HEADER.AddressOfEntryPoint )
            
        else:

self.__warnings.append(
                'AddressOfEntryPoint lies outside the sections\' boundaries. ' +
                'AddressOfEntryPoint: 0x%x' %
                self.OPTIONAL_HEADER.AddressOfEntryPoint )
                
        
        if not fast_load:
            self.parse_data_directories()

def get_warnings(self):
        """Return the list of warnings.
        
        Non-critical problems found when parsing the PE file are
        appended to a list of warnings. This method returns the
        full list.
        """
    
        return self.__warnings
        
        
    def show_warnings(self):
        """Print the list of warnings.
        
        Non-critical problems found when parsing the PE file are
        appended to a list of warnings. This method prints the
        full list to standard output.
        """
    
        for warning in self.__warnings:
            print '>', warning

def full_load(self):
        """Process the data directories.
        
        This mathod will load the data directories which might not have
        been loaded if the "fast_load" option was used.
        """
        
        self.parse_data_directories()
        
        
    def write(self, filename=None):
        """Write the PE file.
        
        This function will process all headers and components
        of the PE file and include all changes made (by just
        assigning to attributes in the PE objects) and write
        the changes back to a file whose name is provided as
        an argument. The filename is optional.
        The data to be written to the file will be returned
        as a 'str' object.
        """
    
        file_data = list(self.__data__)
        for struct in self.__structures__:
        
            struct_data = list(struct.__pack__())
            offset = struct.get_file_offset()
            
            file_data[offset:offset+len(struct_data)] = struct_data
            
        if hasattr(self, 'VS_VERSIONINFO'):
            if hasattr(self, 'FileInfo'):
                for entry in self.FileInfo:
                    if hasattr(entry, 'StringTable'):
                        for st_entry in entry.StringTable:
                            for key, entry in st_entry.entries.items():
                            
                                offsets = st_entry.entries_offsets[key]
                                lengths = st_entry.entries_lengths[key]
                                
                                if len( entry ) > lengths[1]:

uc = zip(
                                            list(entry[:lengths[1]]), ['\0'] * lengths[1] )
                                    l = list()
                                    map(l.extend, uc)

file_data[ 
                                        offsets[1] : offsets[1] + lengths[1]*2 ] = l
                                        
                                else:
                                    
                                    uc = zip(
                                            list(entry), ['\0'] * len(entry) )
                                    l = list()
                                    map(l.extend, uc)

file_data[ 
                                        offsets[1] : offsets[1] + len(entry)*2 ] = l

remainder = lengths[1] - len(entry)
                                    file_data[ 
                                        offsets[1] + len(entry)*2 : 
                                        offsets[1] + lengths[1]*2 ] = [
                                            u'\0' ] * remainder*2

new_file_data = ''.join( [ chr(ord(c)) for c in file_data ] )

if filename:
            f = file(filename, 'wb+')
            f.write(new_file_data)
            f.close()

return new_file_data

def parse_sections(self, offset):
        """Fetch the PE file sections.
        
        The sections will be readily available in the "sections" attribute.
        Its attributes will contain all the section information plus "data"
        a buffer containing the section's data.
        
        The "Characteristics" member will be processed and attributes 
        representing the section characteristics (with the 'IMAGE_SCN_'
        string trimmed from the constant's names) will be added to the
        section instance.
        
        Refer to the SectionStructure class for additional info.
        """
        
        self.sections = []
        
        for i in xrange(self.FILE_HEADER.NumberOfSections):
            section = SectionStructure(self.__IMAGE_SECTION_HEADER_format__)
            if not section:
                break
            section_offset = offset + section.sizeof() * i
            section.set_file_offset(section_offset)
            section.__unpack__(self.__data__[section_offset:])
            self.__structures__.append(section)
                        
            if section.SizeOfRawData > len(self.__data__):
                self.__warnings.append(
                    ('Error parsing section %d. ' % i) +
                    'SizeOfRawData is larger than file.')

if section.PointerToRawData > len(self.__data__):
                self.__warnings.append(
                    ('Error parsing section %d. ' % i) +
                    'PointerToRawData points beyond the end of the file.')

if section.Misc_VirtualSize > 0x10000000:
                self.__warnings.append(
                    ('Suspicious value found parsing section %d. ' % i) +
                    'VirtualSize is extremely large > 256MiB.')

if section.VirtualAddress > 0x10000000:
                self.__warnings.append(
                    ('Suspicious value found parsing section %d. ' % i) +
                    'VirtualAddress is beyond 0x10000000.')

#
            # Some packer used a non-aligned PointerToRawData in the sections,
            # which causes several common tools not to load the section data
            # properly as they blindly read from the indicated offset.
            # It seems that Windows will round the offset down to the largest
            # offset multiple of FileAlignment which is smaller than
            # PointerToRawData. The following code will do the same.
            #
            
            #alignment = self.OPTIONAL_HEADER.FileAlignment
            section_data_start = section.PointerToRawData
            
            if ( self.OPTIONAL_HEADER.FileAlignment != 0 and 
                (section.PointerToRawData % self.OPTIONAL_HEADER.FileAlignment) != 0):
                self.__warnings.append(
                    ('Error parsing section %d. ' % i) +
                    'Suspicious value for FileAlignment in the Optional Header. ' +
                    'Normally the PointerToRawData entry of the sections\' structures ' +
                    'is a multiple of FileAlignment, this might imply the file ' +
                    'is trying to confuse tools which parse this incorrectly')
            
            section_data_end = section_data_start+section.SizeOfRawData
            section.set_data(self.__data__[section_data_start:section_data_end])
            
            section_flags = self.retrieve_flags(SECTION_CHARACTERISTICS, 'IMAGE_SCN_')
            
            # Set the section's flags according the the Characteristics member
            self.set_flags(section, section.Characteristics, section_flags)

if ( section.__dict__.get('IMAGE_SCN_MEM_WRITE', False)  and 
                section.__dict__.get('IMAGE_SCN_MEM_EXECUTE', False) ):
                
                self.__warnings.append(
                    ('Suspicious flags set for section %d. ' % i) +
                    'Both IMAGE_SCN_MEM_WRITE and IMAGE_SCN_MEM_EXECUTE are set.' +
                    'This might indicate a packed executable.')

self.sections.append(section)
            
        if self.FILE_HEADER.NumberOfSections > 0 and self.sections:
            return offset + self.sections[0].sizeof()*self.FILE_HEADER.NumberOfSections
        else:
            return offset

def retrieve_flags(self, flag_dict, flag_filter):
        """Read the flags from a dictionary and return them in a usable form.
        
        Will return a list of (flag, value) for all flags in "flag_dict"
        matching the filter "flag_filter".
        """
        
        return [(f[0], f[1]) for f in flag_dict.items() if
                isinstance(f[0], str) and f[0].startswith(flag_filter)]

def set_flags(self, obj, flag_field, flags):
        """Will process the flags and set attributes in the object accordingly.
        
        The object "obj" will gain attritutes named after the flags provided in
        "flags" and valued True/False, matching the results of applyin each
        flag value from "flags" to flag_field.
        """
    
        for flag in flags:
            if flag[1] & flag_field:
                setattr(obj, flag[0], True)
            else:
                setattr(obj, flag[0], False)
    
    
            
    def parse_data_directories(self):
        """Parse and process the PE file's data directories."""
        
        directory_parsing = (
            ('IMAGE_DIRECTORY_ENTRY_IMPORT', self.parse_import_directory),
            ('IMAGE_DIRECTORY_ENTRY_EXPORT', self.parse_export_directory),
            ('IMAGE_DIRECTORY_ENTRY_RESOURCE', self.parse_resources_directory),
            ('IMAGE_DIRECTORY_ENTRY_DEBUG', self.parse_debug_directory),
            ('IMAGE_DIRECTORY_ENTRY_BASERELOC', self.parse_relocations_directory),
            ('IMAGE_DIRECTORY_ENTRY_TLS', self.parse_directory_tls),
            ('IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT', self.parse_delay_import_directory),
            ('IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT', self.parse_directory_bound_imports) )
            
        for entry in directory_parsing:
            # OC Patch:
            #
            try:
                dir_entry = self.OPTIONAL_HEADER.DATA_DIRECTORY[
                    DIRECTORY_ENTRY[entry[0]]]
            except IndexError:
                break
            if dir_entry.VirtualAddress:
                value = entry[1](dir_entry.VirtualAddress, dir_entry.Size)
                if value:
                    setattr(self, entry[0][6:], value)
        
        
    def parse_directory_bound_imports(self, rva, size):
        """"""
        
        bnd_descr = Structure(self.__IMAGE_BOUND_IMPORT_DESCRIPTOR_format__)
        bnd_descr_size = bnd_descr.sizeof()
        start = rva
        
        bound_imports = []
        while True:

bnd_descr = self.__unpack_data__(
                self.__IMAGE_BOUND_IMPORT_DESCRIPTOR_format__,
                   self.__data__[rva:rva+bnd_descr_size],
                   file_offset = rva)
            if bnd_descr is None:
                # If can't parse directory then silently return.
                # This directory does not necesarily have to be valid to
                # still have a valid PE file

self.__warnings.append(
                    'The Bound Imports directory exists but can\'t be parsed.')

return
                   
            if bnd_descr.all_zeroes():
                break
                
            rva += bnd_descr.sizeof()
            
            forwarder_refs = []
            for idx in xrange(bnd_descr.NumberOfModuleForwarderRefs):
                # Both structures IMAGE_BOUND_IMPORT_DESCRIPTOR and
                # IMAGE_BOUND_FORWARDER_REF have the same size.
                bnd_frwd_ref = self.__unpack_data__(
                    self.__IMAGE_BOUND_FORWARDER_REF_format__,
                    self.__data__[rva:rva+bnd_descr_size],
                    file_offset = rva)
                # OC Patch:
                if not bnd_frwd_ref:
                    raise PEFormatError(
                        "IMAGE_BOUND_FORWARDER_REF cannot be read")
                rva += bnd_frwd_ref.sizeof()
                
                name_str =  self.get_string_from_data(
                    start+bnd_frwd_ref.OffsetModuleName, self.__data__)
                    
                if not name_str:
                    break
                forwarder_refs.append(BoundImportRefData(
                    struct = bnd_frwd_ref,
                    name = name_str))
                
            name_str = self.get_string_from_data(
                start+bnd_descr.OffsetModuleName, self.__data__)
                
            if not name_str:
                break
            bound_imports.append(
                BoundImportDescData(
                    struct = bnd_descr,
                    name = name_str,
                    entries = forwarder_refs))
                    
        return bound_imports

def parse_directory_tls(self, rva, size):
        """"""
            
        if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
            format = self.__IMAGE_TLS_DIRECTORY_format__
            
        elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
            format = self.__IMAGE_TLS_DIRECTORY64_format__

tls_struct = self.__unpack_data__(
            format,
            self.get_data(rva),
            file_offset = self.get_offset_from_rva(rva))
            
        if not tls_struct:
            return None
                
        return TlsData( struct = tls_struct )
    
    
    def parse_relocations_directory(self, rva, size):
        """"""
        
        rlc = Structure(self.__IMAGE_BASE_RELOCATION_format__)
        rlc_size = rlc.sizeof()
        end = rva+size
        
        relocations = []
        while rva<end:
            
            # OC Patch:
            # Malware that has bad rva entries will cause an error.
            # Just continue on after an exception
            #
            try:
                rlc = self.__unpack_data__(
                    self.__IMAGE_BASE_RELOCATION_format__,
                    self.get_data(rva, rlc_size),
                    file_offset = self.get_offset_from_rva(rva) )
            except PEFormatError:
                self.__warnings.append(
                    'Invalid relocation information. Can\'t read ' +
                    'data at RVA: 0x%x' % rva)
                rlc = None
            
            if not rlc:
                break
                
            reloc_entries = self.parse_relocations(
                rva+rlc_size, rlc.VirtualAddress, rlc.SizeOfBlock-rlc_size)
                
            relocations.append(
                BaseRelocationData(
                    struct = rlc,
                    entries = reloc_entries))
            
            if not rlc.SizeOfBlock:
                break
            rva += rlc.SizeOfBlock
            
        return relocations
    
        
    def parse_relocations(self, data_rva, rva, size):
        """"""
        
        data = self.get_data(data_rva, size)
        
        entries = []
        for idx in xrange(len(data)/2):
            word = struct.unpack('<H', data[idx*2:(idx+1)*2])[0]
            reloc_type = (word>>12)
            reloc_offset = (word&0x0fff)
            entries.append(
                RelocationData(
                    type = reloc_type,
                    rva = reloc_offset+rva))
            
        return entries

def parse_debug_directory(self, rva, size):
        """"""
            
        dbg = Structure(self.__IMAGE_DEBUG_DIRECTORY_format__)
        dbg_size = dbg.sizeof()
        
        debug = []
        for idx in xrange(size/dbg_size):
            try:
                data = self.get_data(rva+dbg_size*idx, dbg_size)
            except PEFormatError, e:
                self.__warnings.append(
                    'Invalid debug information. Can\'t read ' +
                    'data at RVA: 0x%x' % rva)
                return None
                
            dbg = self.__unpack_data__(
                self.__IMAGE_DEBUG_DIRECTORY_format__,
                data, file_offset = self.get_offset_from_rva(rva+dbg_size*idx))
                
            if not dbg:
                return None
                
            debug.append(
                DebugData(
                    struct = dbg))
            
        return debug

def parse_resources_directory(self, rva, size=0, base_rva = None, level = 0):
        """Parse the resources directory.
        
        Given the rva of the resources directory, it will process all
        its entries.
        
        The root will have the corresponding member of its structure,
        IMAGE_RESOURCE_DIRECTORY plus 'entries', a list of all the
        entries in the directory.
        
        Those entries will have, correspondingly, all the structure's
        members (IMAGE_RESOURCE_DIRECTORY_ENTRY) and an additional one,
        "directory", pointing to the IMAGE_RESOURCE_DIRECTORY structure
        representing upper layers of the tree. This one will also have
        an 'entries' attribute, pointing to the 3rd, and last, level.
        Another directory with more entries. Those last entries will
        have a new atribute (both 'leaf' or 'data_entry' can be used to
        access it). This structure finally points to the resource data.
        All the members of this structure, IMAGE_RESOURCE_DATA_ENTRY,
        are available as its attributes.
        """
        
        # OC Patch:
        original_rva = rva
        
        if base_rva is None:
            base_rva = rva
        
        resources_section = self.get_section_by_rva(rva)
        
        try:
            # If the RVA is invalid all would blow up. Some EXEs seem to be
            # specially nasty and have an invalid RVA.
            data = self.get_data(rva)
        except PEFormatError, e:
            self.__warnings.append(
                'Invalid resources directory. Can\'t read ' +
                'directory data at RVA: 0x%x' % rva)
            return None
            
        # Get the resource directory structure, that is, the header
        # of the table preceding the actual entries
        #
        resource_dir = self.__unpack_data__(
            self.__IMAGE_RESOURCE_DIRECTORY_format__, data,
            file_offset = self.get_offset_from_rva(rva) )
        if resource_dir is None:
            # If can't parse resources directory then silently return.
            # This directory does not necesarily have to be valid to
            # still have a valid PE file
            self.__warnings.append(
                'Invalid resources directory. Can\'t parse ' +
                'directory data at RVA: 0x%x' % rva)
            return None
        
        dir_entries = []
        
        # Advance the rva to the positon immediately following the directory
        # table header and pointing to the first entry in the table
        #
        rva += resource_dir.sizeof()
        
        number_of_entries = (
            resource_dir.NumberOfNamedEntries +
            resource_dir.NumberOfIdEntries )
            
        strings_to_postprocess = list()
        
        for idx in xrange(number_of_entries):
            
            res = self.parse_resource_entry(rva)
            if res is None:
                self.__warnings.append(
                    'Error parsing the resources directory, ' +
                    'Entry %d is invalid, RVA = 0x%x. ' % 
                    (idx, rva) )
                break

entry_name = None
            entry_id = None
            
            # If all named entries have been processed, only Id ones
            # remain
            
            if idx >= resource_dir.NumberOfNamedEntries:
                entry_id = res.Name
            else:
                ustr_offset = base_rva+res.NameOffset
                try:
                    #entry_name = self.get_string_u_at_rva(ustr_offset, max_length=16)
                    entry_name = UnicodeStringWrapperPostProcessor(self, ustr_offset)
                    strings_to_postprocess.append(entry_name)
                    
                except PEFormatError, excp:
                    self.__warnings.append(
                        'Error parsing the resources directory, ' +
                        'attempting to read entry name. ' +
                        'Can\'t read unicode string at offset 0x%x' % 
                        (ustr_offset) )
                
                
            if res.DataIsDirectory:
                # OC Patch:
                #
                # One trick malware can do is to recursively reference
                # the next directory. This causes hilarity to ensue when
                # trying to parse everything correctly.
                # If the original RVA given to this function is equal to
                # the next one to parse, we assume that it's a trick.
                # Instead of raising a PEFormatError this would skip some
                # reasonable data so we just break.
                #
                # 9ee4d0a0caf095314fd7041a3e4404dc is the offending sample
                if original_rva == (base_rva + res.OffsetToDirectory):
                    
                    break
                    
                else:
                    entry_directory = self.parse_resources_directory(
                        base_rva+res.OffsetToDirectory,
                        base_rva=base_rva, level = level+1)

if not entry_directory:
                    break
                dir_entries.append(
                    ResourceDirEntryData(
                        struct = res,
                        name = entry_name,
                        id = entry_id,
                        directory = entry_directory))

else:
                struct = self.parse_resource_data_entry(
                    base_rva + res.OffsetToDirectory)

if struct:
                    entry_data = ResourceDataEntryData(
                        struct = struct,
                        lang = res.Name & 0xff,
                        sublang = (res.Name>>8) & 0xff)
                        
                    dir_entries.append(
                        ResourceDirEntryData(
                            struct = res,
                            name = entry_name,
                            id = entry_id,
                            data = entry_data))
                    
                else:
                    break

# Check if this entry contains version information
            #
            if level == 0 and res.Id == RESOURCE_TYPE['RT_VERSION']:
                if len(dir_entries)>0:
                    last_entry = dir_entries[-1]
                    
                rt_version_struct = None
                try:
                    rt_version_struct = last_entry.directory.entries[0].directory.entries[0].data.struct
                except:
                    # Maybe a malformed directory structure...?
                    # Lets ignore it
                    pass
                    
                if rt_version_struct is not None:
                    self.parse_version_information(rt_version_struct)
    
            rva += res.sizeof()
            
                    
        string_rvas = [s.get_rva() for s in strings_to_postprocess]
        string_rvas.sort()
        
        for idx, s in enumerate(strings_to_postprocess):
            s.render_pascal_16()
            
            
        resource_directory_data = ResourceDirData(
            struct = resource_dir,
            entries = dir_entries)
                        
        return resource_directory_data
        
            
    def parse_resource_data_entry(self, rva):
        """Parse a data entry from the resources directory."""
    
        try:
            # If the RVA is invalid all would blow up. Some EXEs seem to be
            # specially nasty and have an invalid RVA.
            data = self.get_data(rva)
        except PEFormatError, excp:
            self.__warnings.append(
                'Error parsing a resource directory data entry, ' +
                'the RVA is invalid: 0x%x' % ( rva ) )
            return None
            
        data_entry = self.__unpack_data__(
            self.__IMAGE_RESOURCE_DATA_ENTRY_format__, data,
            file_offset = self.get_offset_from_rva(rva) )
            
        return data_entry

def parse_resource_entry(self, rva):
        """Parse a directory entry from the resources directory."""

resource = self.__unpack_data__(
            self.__IMAGE_RESOURCE_DIRECTORY_ENTRY_format__, self.get_data(rva),
            file_offset = self.get_offset_from_rva(rva) )
            
        if resource is None:
            return None
            
        #resource.NameIsString = (resource.Name & 0x80000000L) >> 31
        resource.NameOffset = resource.Name & 0x7FFFFFFFL
        
        resource.__pad = resource.Name & 0xFFFF0000L
        resource.Id = resource.Name & 0x0000FFFFL
        
        resource.DataIsDirectory = (resource.OffsetToData & 0x80000000L) >> 31
        resource.OffsetToDirectory = resource.OffsetToData & 0x7FFFFFFFL
        
        return resource
            
            
    def parse_version_information(self, version_struct):
        """Parse version information structure.
        
        The date will be made available in three attributes of the PE object.
        
        VS_VERSIONINFO     will contain the first three fields of the main structure:
            'Length', 'ValueLength', and 'Type'
            
        VS_FIXEDFILEINFO    will hold the rest of the fields, accessible as sub-attributes:
            'Signature', 'StrucVersion', 'FileVersionMS', 'FileVersionLS',
            'ProductVersionMS', 'ProductVersionLS', 'FileFlagsMask', 'FileFlags',
            'FileOS', 'FileType', 'FileSubtype', 'FileDateMS', 'FileDateLS'
            
        FileInfo    is a list of all StringFileInfo and VarFileInfo structures.
        
        StringFileInfo structures will have a list as an attribute named 'StringTable'
        containing all the StringTable structures. Each of those structures contains a 
        dictionary 'entries' with all the key/value version information string pairs.
        
        VarFileInfo structures will have a list as an attribute named 'Var' containing
        all Var structures. Each Var structure will have a dictionary as an attribute
        named 'entry' which will contain the name and value of the Var.
        """
    
    
        # Retrieve the data for the version info resource
        #
        start_offset = self.get_offset_from_rva( version_struct.OffsetToData )
        raw_data = self.__data__[ start_offset : start_offset+version_struct.Size ]
        
        
        # Map the main structure and the subsequent string
        #    
        versioninfo_struct = self.__unpack_data__(
            self.__VS_VERSIONINFO_format__, raw_data, 
            file_offset = start_offset )
            
        if versioninfo_struct is None:
            return 
            
        ustr_offset = version_struct.OffsetToData + versioninfo_struct.sizeof()
        try:
            versioninfo_string = self.get_string_u_at_rva( ustr_offset )
        except PEFormatError, excp:
            self.__warnings.append(
                'Error parsing the version information, ' +
                'attempting to read VS_VERSION_INFO string. Can\'t ' +
                'read unicode string at offset 0x%x' % (
                ustr_offset ) )
                
            versioninfo_string = None
         
        # If the structure does not contain the expected name, it's assumed to be invalid
        #            
        if versioninfo_string != u'VS_VERSION_INFO':

self.__warnings.append('Invalid VS_VERSION_INFO block')
            return

# Set the PE object's VS_VERSIONINFO to this one
        #
        self.VS_VERSIONINFO = versioninfo_struct

# The the Key attribute to point to the unicode string identifying the structure
        #        
        self.VS_VERSIONINFO.Key = versioninfo_string

# Process the fixed version information, get the offset and structure
        #
        fixedfileinfo_offset = self.dword_align(
            versioninfo_struct.sizeof() + 2 * (len(versioninfo_string) + 1),
            version_struct.OffsetToData)
        fixedfileinfo_struct = self.__unpack_data__(
            self.__VS_FIXEDFILEINFO_format__,
            raw_data[fixedfileinfo_offset:], 
            file_offset = start_offset+fixedfileinfo_offset )

if not fixedfileinfo_struct:
            return

# Set the PE object's VS_FIXEDFILEINFO to this one
        #
        self.VS_FIXEDFILEINFO = fixedfileinfo_struct
        
        
        # Start parsing all the StringFileInfo and VarFileInfo structures
        #
        
        # Get the first one
        #
        stringfileinfo_offset = self.dword_align(
            fixedfileinfo_offset + fixedfileinfo_struct.sizeof(),
            version_struct.OffsetToData)
        original_stringfileinfo_offset = stringfileinfo_offset
        
        
        # Set the PE object's attribute that will contain them all.
        #
        self.FileInfo = list()

while True:
        
            # Process the StringFileInfo/VarFileInfo struct
            #
            stringfileinfo_struct = self.__unpack_data__(
                self.__StringFileInfo_format__, 
                raw_data[stringfileinfo_offset:], 
                file_offset = start_offset+stringfileinfo_offset )
                
            if stringfileinfo_struct is None:
                self.__warnings.append(
                    'Error parsing StringFileInfo/VarFileInfo struct' )
                return None
            
            # Get the subsequent string defining the structure.
            #
            ustr_offset = ( version_struct.OffsetToData + 
                stringfileinfo_offset + versioninfo_struct.sizeof() )
            try:
                stringfileinfo_string = self.get_string_u_at_rva( ustr_offset )
            except PEFormatError, excp:
                self.__warnings.append(
                    'Error parsing the version information, ' +
                    'attempting to read StringFileInfo string. Can\'t ' +
                    'read unicode string at offset 0x%x' %  ( ustr_offset ) )
                break
        
            # Set such string as the Key attribute
            #
            stringfileinfo_struct.Key = stringfileinfo_string
            
            
            # Append the structure to the PE object's list
            #
            self.FileInfo.append(stringfileinfo_struct)
        
        
            # Parse a StringFileInfo entry
            #
            if stringfileinfo_string == u'StringFileInfo':
                
                if stringfileinfo_struct.Type == 1 and stringfileinfo_struct.ValueLength == 0:
            
                    stringtable_offset = self.dword_align(
                        stringfileinfo_offset + stringfileinfo_struct.sizeof() + 
                            2*(len(stringfileinfo_string)+1),
                        version_struct.OffsetToData)
                  
                    stringfileinfo_struct.StringTable = list()

# Process the String Table entries
                    #
                    while True:
                        stringtable_struct = self.__unpack_data__(
                            self.__StringTable_format__,
                            raw_data[stringtable_offset:], 
                            file_offset = start_offset+stringtable_offset )
                            
                        if not stringtable_struct:
                            break
                            
                        ustr_offset = ( version_struct.OffsetToData + stringtable_offset + 
                            stringtable_struct.sizeof() )
                        try:
                            stringtable_string = self.get_string_u_at_rva( ustr_offset )
                        except PEFormatError, excp:
                            self.__warnings.append(
                                'Error parsing the version information, ' +
                                'attempting to read StringTable string. Can\'t ' +
                                'read unicode string at offset 0x%x' % ( ustr_offset ) )
                            break
                        
                        stringtable_struct.LangID = stringtable_string
                        stringtable_struct.entries = dict()
                        stringtable_struct.entries_offsets = dict()
                        stringtable_struct.entries_lengths = dict()
                        stringfileinfo_struct.StringTable.append(stringtable_struct)
            
                        entry_offset = self.dword_align(
                            stringtable_offset + stringtable_struct.sizeof() +
                                2*(len(stringtable_string)+1),
                            version_struct.OffsetToData)
            
                        # Process all entries in the string table
                        #
            
                        while entry_offset < stringtable_offset + stringtable_struct.Length:
                    
                            string_struct = self.__unpack_data__(
                                self.__String_format__, raw_data[entry_offset:], 
                                file_offset = start_offset+entry_offset )
                                
                            if not string_struct:
                                break
                                
                            ustr_offset = ( version_struct.OffsetToData + entry_offset +
                                string_struct.sizeof() )
                            try:
                                key = self.get_string_u_at_rva( ustr_offset )
                                key_offset = self.get_offset_from_rva( ustr_offset )
                            except PEFormatError, excp:
                                self.__warnings.append(
                                    'Error parsing the version information, ' +
                                    'attempting to read StringTable Key string. Can\'t ' +
                                    'read unicode string at offset 0x%x' % ( ustr_offset ) )
                                break
                                
                            value_offset = self.dword_align(
                                2*(len(key)+1) + entry_offset + string_struct.sizeof(),
                                version_struct.OffsetToData)
            
                            ustr_offset = version_struct.OffsetToData + value_offset
                            try:
                                value = self.get_string_u_at_rva( ustr_offset,
                                    max_length = string_struct.ValueLength )
                                value_offset = self.get_offset_from_rva( ustr_offset )
                            except PEFormatError, excp:
                                self.__warnings.append(
                                    'Error parsing the version information, ' +
                                    'attempting to read StringTable Value string. ' +
                                    'Can\'t read unicode string at offset 0x%x' % (
                                    ustr_offset ) )
                                break
                                
                            if string_struct.Length == 0:
                                entry_offset = stringtable_offset + stringtable_struct.Length
                            else:
                                entry_offset = self.dword_align(
                                    string_struct.Length+entry_offset, version_struct.OffsetToData)
                                
                            key_as_char = []
                            for c in key:
                                if ord(c)>128:
                                    key_as_char.append('\\x%02x' %ord(c))
                                else:
                                    key_as_char.append(c)
                            
                            key_as_char = ''.join(key_as_char)

setattr(stringtable_struct, key_as_char, value)
                            stringtable_struct.entries[key] = value
                            stringtable_struct.entries_offsets[key] = (key_offset, value_offset)
                            stringtable_struct.entries_lengths[key] = (len(key), len(value))
                            
                    
                        stringtable_offset = self.dword_align(
                            stringtable_struct.Length + stringtable_offset,
                            version_struct.OffsetToData)
                        if stringtable_offset >= stringfileinfo_struct.Length:
                            break
                    
            # Parse a VarFileInfo entry
            #
            elif stringfileinfo_string == u'VarFileInfo':
            
                varfileinfo_struct = stringfileinfo_struct
                varfileinfo_struct.name = 'VarFileInfo'
            
                if varfileinfo_struct.Type == 1 and varfileinfo_struct.ValueLength == 0:
              
                    var_offset = self.dword_align(
                        stringfileinfo_offset + varfileinfo_struct.sizeof() +
                            2*(len(stringfileinfo_string)+1),
                        version_struct.OffsetToData)
                        
                    varfileinfo_struct.Var = list()
              
                    # Process all entries
                    #

while True:
                        var_struct = self.__unpack_data__(
                            self.__Var_format__,
                            raw_data[var_offset:], 
                            file_offset = start_offset+var_offset )
                            
                        if not var_struct:
                            break
                            
                        ustr_offset = ( version_struct.OffsetToData + var_offset + 
                            var_struct.sizeof() )
                        try:
                            var_string = self.get_string_u_at_rva( ustr_offset )
                        except PEFormatError, excp:
                            self.__warnings.append(
                                'Error parsing the version information, ' +
                                'attempting to read VarFileInfo Var string. ' +
                                'Can\'t read unicode string at offset 0x%x' % (ustr_offset))
                            break

varfileinfo_struct.Var.append(var_struct)
                
                        varword_offset = self.dword_align(
                            2*(len(var_string)+1) + var_offset + var_struct.sizeof(),
                            version_struct.OffsetToData)
                        orig_varword_offset = varword_offset
                            
                        while varword_offset < orig_varword_offset + var_struct.ValueLength:
                            word1 = self.get_word_from_data(
                                raw_data[varword_offset:varword_offset+2], 0)
                            word2 = self.get_word_from_data(
                                raw_data[varword_offset+2:varword_offset+4], 0)
                            varword_offset += 4
        
                            var_struct.entry = {var_string: '0x%04x 0x%04x' % (word1, word2)}

var_offset = self.dword_align(
                            var_offset+var_struct.Length, version_struct.OffsetToData)
                            
                        if var_offset <= var_offset+var_struct.Length:
                            break
              
              
              
            # Increment and align the offset
            #
            stringfileinfo_offset = self.dword_align(
                stringfileinfo_struct.Length+stringfileinfo_offset,
                version_struct.OffsetToData)
          
            # Check if all the StringFileInfo and VarFileInfo items have been processed
            #
            if stringfileinfo_struct.Length == 0 or stringfileinfo_offset >= versioninfo_struct.Length:
                break
            
            
                            
    def parse_export_directory(self, rva, size):
        """Parse the export directory.
        
        Given the rva of the export directory, it will process all
        its entries.
        
        The exports will be made available through a list "exports"
        containing a tuple with the following elements:
        
            (ordinal, symbol_address, symbol_name)
            
        And also through a dicionary "exports_by_ordinal" whose keys
        will be the ordinals and the values tuples of the from:
        
            (symbol_address, symbol_name)
            
        The symbol addresses are relative, not absolute.
        """
    
        try:
            export_dir =  self.__unpack_data__(
                self.__IMAGE_EXPORT_DIRECTORY_format__, self.get_data(rva),
                file_offset = self.get_offset_from_rva(rva) )
        except PEFormatError:
            self.__warnings.append(
                'Error parsing export directory at RVA: 0x%x' % ( rva ) )
            return
        
        if not export_dir:
            return
        
        try:
            address_of_names = self.get_data(
                export_dir.AddressOfNames, export_dir.NumberOfNames*4)
            address_of_name_ordinals = self.get_data(
                export_dir.AddressOfNameOrdinals, export_dir.NumberOfNames*4)
            address_of_functions = self.get_data(
                export_dir.AddressOfFunctions, export_dir.NumberOfFunctions*4)
        except PEFormatError:
            self.__warnings.append(
                'Error parsing export directory at RVA: 0x%x' % ( rva ) )
            return
            
        exports = []
        
        for i in xrange(export_dir.NumberOfNames):
        
                
            symbol_name = self.get_string_at_rva(
                self.get_dword_from_data(address_of_names, i))
            
            symbol_ordinal = self.get_word_from_data(
                address_of_name_ordinals, i)
                
            
            if symbol_ordinal*4<len(address_of_functions):
                symbol_address = self.get_dword_from_data(
                    address_of_functions, symbol_ordinal)
            else:
                # Corrupt? a bad pointer... we assume it's all
                # useless, no exports
                return None
            
            # If the funcion's rva points within the export directory
            # it will point to a string with the forwarded symbol's string
            # instead of pointing the the function start address.
            
            if symbol_address>=rva and symbol_address<rva+size:
                forwarder_str = self.get_string_at_rva(symbol_address)
            else:
                forwarder_str = None
        
            
            exports.append(
                ExportData(
                    ordinal = export_dir.Base+symbol_ordinal,
                    address = symbol_address,
                    name = symbol_name,
                    forwarder = forwarder_str))
                    
        ordinals = [exp.ordinal for exp in exports]
        
        for idx in xrange(export_dir.NumberOfFunctions):

if not idx+export_dir.Base in ordinals:
                symbol_address = self.get_dword_from_data(
                    address_of_functions, 
                    idx)
                
                #
                # Checking for forwarder again.
                #
                if symbol_address>=rva and symbol_address<rva+size:
                    forwarder_str = self.get_string_at_rva(symbol_address)
                else:
                    forwarder_str = None
                    
                exports.append(
                    ExportData(
                        ordinal = export_dir.Base+idx,
                        address = symbol_address,
                        name = None,
                        forwarder = forwarder_str))
                      
        return ExportDirData(
                struct = export_dir,
                symbols = exports)
        
                    
    def dword_align(self, offset, base):
        offset += base
        return (offset+3) - ((offset+3)%4) - base

def parse_delay_import_directory(self, rva, size):
        """Walk and parse the delay import directory."""
        
        import_descs =  []
        while True:
            try:
                # If the RVA is invalid all would blow up. Some PEs seem to be
                # specially nasty and have an invalid RVA.
                data = self.get_data(rva)
            except PEFormatError, e:
                self.__warnings.append(
                    'Error parsing the Delay import directory at RVA: 0x%x' % ( rva ) )
                break
                
            import_desc =  self.__unpack_data__(
                self.__IMAGE_DELAY_IMPORT_DESCRIPTOR_format__,
                data, file_offset = self.get_offset_from_rva(rva) )
            
            
            # If the structure is all zeores, we reached the end of the list
            if not import_desc or import_desc.all_zeroes():
                break
            
            
            rva += import_desc.sizeof()
            
            try:
                import_data =  self.parse_imports(
                    import_desc.pINT,
                    import_desc.pIAT,
                    None)
            except PEFormatError, e:
                self.__warnings.append(
                    'Error parsing the Delay import directory. ' +
                    'Invalid import data at RVA: 0x%x' % ( rva ) )
                break
            
            if not import_data:
                continue
            
            
            dll = self.get_string_at_rva(import_desc.szName)
            if dll:
                import_descs.append(
                    ImportDescData(
                        struct = import_desc,
                        imports = import_data,
                        dll = dll))
        
        return import_descs

def parse_import_directory(self, rva, size):
        """Walk and parse the import directory."""

import_descs =  []
        while True:
            try:
                # If the RVA is invalid all would blow up. Some EXEs seem to be
                # specially nasty and have an invalid RVA.
                data = self.get_data(rva)
            except PEFormatError, e:
                self.__warnings.append(
                    'Error parsing the Import directory at RVA: 0x%x' % ( rva ) )
                break
                
            import_desc =  self.__unpack_data__(
                self.__IMAGE_IMPORT_DESCRIPTOR_format__,
                data, file_offset = self.get_offset_from_rva(rva) )
                
            # If the structure is all zeores, we reached the end of the list
            if not import_desc or import_desc.all_zeroes():
                break
                
            rva += import_desc.sizeof()
                        
            try:
                import_data =  self.parse_imports(
                    import_desc.OriginalFirstThunk,
                    import_desc.FirstThunk,
                    import_desc.ForwarderChain)
            except PEFormatError, excp:
                self.__warnings.append(
                    'Error parsing the Import directory. ' +
                    'Invalid Import data at RVA: 0x%x' % ( rva ) )
                break
                #raise excp
                
            if not import_data:
                continue
                
            dll = self.get_string_at_rva(import_desc.Name)
            if dll:
                import_descs.append(
                    ImportDescData(
                        struct = import_desc,
                        imports = import_data,
                        dll = dll))

return import_descs

def parse_imports(self, original_first_thunk, first_thunk, forwarder_chain):
        """Parse the imported symbols.
        
        It will fill a list, which will be avalable as the dictionary
        attribute "imports". Its keys will be the DLL names and the values
        all the symbols imported from that object.
        """
        
        imported_symbols = []
        imports_section = self.get_section_by_rva(first_thunk)
        if not imports_section:
            raise PEFormatError, 'Invalid/corrupt imports.'
            
        
        # Import Lookup Table. Contains ordinals or pointers to strings.
        ilt = self.get_import_table(original_first_thunk)
        # Import Address Table. May have identical content to ILT if
        # PE file is not bounded, Will contain the address of the
        # imported symbols once the binary is loaded or if it is already
        # bound.
        iat = self.get_import_table(first_thunk)

# OC Patch:
        # Would crash if iat or ilt had None type 
        if not iat and not ilt:
            raise PEFormatError( 
                'Invalid Import Table information. ' +
                'Both ILT and IAT appear to be broken.')
            
        if not iat and ilt:
            table = ilt
        elif iat and not ilt:
            table = iat
        elif ilt and ((len(ilt) and len(iat)==0) or (len(ilt) == len(iat))):
            table = ilt
        elif (ilt and len(ilt))==0 and (iat and len(iat)):
            table = iat
        else:
            return None
            
        for idx in xrange(len(table)):

imp_ord = None
            imp_hint = None
            imp_name = None
            hint_name_table_rva = None
                        
            if table[idx].AddressOfData:
            
                if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
                    ordinal_flag = IMAGE_ORDINAL_FLAG
                elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
                    ordinal_flag = IMAGE_ORDINAL_FLAG64
            
                # If imported by ordinal, we will append the ordinal number
                #
                if table[idx].AddressOfData & ordinal_flag:
                    import_by_ordinal = True
                    imp_ord = table[idx].AddressOfData & 0xffff
                    imp_name = None
                else:
                    import_by_ordinal = False
                    try:
                        hint_name_table_rva = table[idx].AddressOfData & 0x7fffffff
                        data = self.get_data(hint_name_table_rva, 2)
                        # Get the Hint
                        imp_hint = self.get_word_from_data(data, 0)
                        imp_name = self.get_string_at_rva(table[idx].AddressOfData+2)
                    except PEFormatError, e:
                        pass

imp_address = first_thunk+self.OPTIONAL_HEADER.ImageBase+idx*4
                
            if iat and ilt and ilt[idx].AddressOfData != iat[idx].AddressOfData:
                imp_bound = iat[idx].AddressOfData
            else:
                imp_bound = None
                
            if imp_name != '' and (imp_ord or imp_name):
                imported_symbols.append(
                    ImportData(
                        import_by_ordinal = import_by_ordinal,
                        ordinal = imp_ord,
                        hint = imp_hint,
                        name = imp_name,
                        bound = imp_bound,
                        address = imp_address,
                        hint_name_table_rva = hint_name_table_rva))
            
        return imported_symbols

def get_import_table(self, rva):
    
        table = []
        
        while True and rva:
            try:
                data = self.get_data(rva)
            except PEFormatError, e:
                self.__warnings.append(
                    'Error parsing the import table. ' +
                    'Invalid data at RVA: 0x%x' % ( rva ) )
                return None
                
            if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
                format = self.__IMAGE_THUNK_DATA_format__
            elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
                format = self.__IMAGE_THUNK_DATA64_format__
                
            thunk_data = self.__unpack_data__(
                format, data, file_offset=self.get_offset_from_rva(rva) )
                    
            if not thunk_data or thunk_data.all_zeroes():
                break
                
            rva += thunk_data.sizeof()
            
            table.append(thunk_data)
            
        return table
    
    
    def get_memory_mapped_image(self, max_virtual_address=0x10000000, ImageBase=None):
        """Returns the data corresponding to the memory layout of the PE file.
        
        The data includes the PE header and the sections loaded at offsets
        corresponding to their relative virtual addresses. (the VirtualAddress
        section header member).
        Any offset in this data corresponds to the absolute memory address
        ImageBase+offset.
        
        The optional argument 'max_virtual_address' provides with means of limiting
        which section are processed.
        Any section with their VirtualAddress beyond this value will be skipped.
        Normally, sections with values beyond this range are just there to confuse
        tools. It's a common trick to see in packed executables.
        
        If the 'ImageBase' optional argument is supplied, the file's relocations
        will be applied to the image by calling the 'relocate_image()' method.
        """
        
        # Collect all sections in one code block    
        data = self.header
        for section in self.sections:

# Miscellanous integrity tests.
            # Some packer will set these to bogus values to
            # make tools go nuts.
            #
            if section.Misc_VirtualSize == 0 or section.SizeOfRawData == 0:
                continue
            
            if section.SizeOfRawData > len(self.__data__):
                continue
                
            if section.PointerToRawData > len(self.__data__):
                continue
        
            if section.VirtualAddress >= max_virtual_address:
                continue
                
            padding_length = section.VirtualAddress - len(data)
            
            if padding_length>0:
                data += '\0'*padding_length
            elif padding_length<0:
                data = data[:padding_length]
                
            data += section.data
            
        return data

def get_data(self, rva, length=None):
        """Get data regardless of the section where it lies on.
        
        Given a rva and the size of the chunk to retrieve, this method
        will find the section where the data lies and return the data.
        """
        
        s = self.get_section_by_rva(rva)

if not s:
            if rva<len(self.header):
                if length:
                    end = rva+length
                else:
                    end = None
                return self.header[rva:end]
                
            raise PEFormatError, 'data at RVA can\'t be fetched. Corrupt header?'

return s.get_data(rva, length)

def get_rva_from_offset(self, offset):
        """Get the rva corresponding to this file offset. """

s = self.get_section_by_offset(offset)
        if not s:
            raise PEFormatError("specified offset (0x%x) doesn't belong to any section." % offset)
        return s.get_rva_from_offset(offset)

def get_offset_from_rva(self, rva):
        """Get the file offset corresponding to this rva.
        
        Given a rva , this method will find the section where the
        data lies and return the offset within the file.
        """
        
        s = self.get_section_by_rva(rva)
        if not s:
                
            raise PEFormatError, 'data at RVA can\'t be fetched. Corrupt header?'
            
        return s.get_offset_from_rva(rva)
        
            
    def get_string_at_rva(self, rva):
        """Get an ASCII string located at the given address."""

s = self.get_section_by_rva(rva)
        if not s:
            if rva<len(self.header):
                return self.get_string_from_data(rva, self.header)
            return None
                
        return self.get_string_from_data(rva-s.VirtualAddress, s.data)
        
        
    def get_string_from_data(self, offset, data):
        """Get an ASCII string from within the data."""

# OC Patch
        b = None
        
        try:
            b = data[offset]
        except IndexError:
            return ''
        
        s = ''
        while ord(b):
            s += b
            offset += 1
            try:
                b = data[offset]
            except IndexError:
                break
          
        return s

def get_string_u_at_rva(self, rva, max_length = 2**16):
        """Get an Unicode string located at the given address."""
        
        try:
            # If the RVA is invalid all would blow up. Some EXEs seem to be
            # specially nasty and have an invalid RVA.
            data = self.get_data(rva, 2)
        except PEFormatError, e:
            return None

#length = struct.unpack('<H', data)[0]
                
        s = u''
        for idx in xrange(max_length):
            try:
                uchr = struct.unpack('<H', self.get_data(rva+2*idx, 2))[0]
            except struct.error:
                break
                
            if unichr(uchr) == u'\0':
                break
            s += unichr(uchr)
            
        return s

def get_section_by_offset(self, offset):
        """Get the section containing the given file offset."""
    
        sections = [s for s in self.sections if s.contains_offset(offset)]
        
        if sections:
            return sections[0]
                    
        return None

def get_section_by_rva(self, rva):
        """Get the section containing the given address."""
    
        sections = [s for s in self.sections if s.contains_rva(rva)]
        
        if sections:
            return sections[0]
                    
        return None
    
    def __str__(self):
        return self.dump_info()
    
            
    def print_info(self):
        """Print all the PE header information in a human readable from."""
        print self.dump_info()
        
        
    def dump_info(self, dump=None):
        """Dump all the PE header information into human readable string."""
        
        
        if dump is None:
            dump = Dump()
        
        warnings = self.get_warnings()
        if warnings:
            dump.add_header('Parsing Warnings')
            for warning in warnings:
                dump.add_line(warning)
                dump.add_newline()
                
        
        dump.add_header('DOS_HEADER')
        dump.add_lines(self.DOS_HEADER.dump())
        dump.add_newline()
        
        dump.add_header('NT_HEADERS')
        dump.add_lines(self.NT_HEADERS.dump())
        dump.add_newline()
        
        dump.add_header('FILE_HEADER')
        dump.add_lines(self.FILE_HEADER.dump())
        
        image_flags = self.retrieve_flags(IMAGE_CHARACTERISTICS, 'IMAGE_FILE_')
            
        dump.add('Flags: ')
        flags = []
        for flag in image_flags:
            if getattr(self.FILE_HEADER, flag[0]):
                flags.append(flag[0])
        dump.add_line(', '.join(flags))
        dump.add_newline()
        
        if hasattr(self, 'OPTIONAL_HEADER') and self.OPTIONAL_HEADER is not None:
            dump.add_header('OPTIONAL_HEADER')
            dump.add_lines(self.OPTIONAL_HEADER.dump())

dll_characteristics_flags = self.retrieve_flags(DLL_CHARACTERISTICS, 'IMAGE_DLL_CHARACTERISTICS_')
            
        dump.add('DllCharacteristics: ')
        flags = []
        for flag in dll_characteristics_flags:
            if getattr(self.OPTIONAL_HEADER, flag[0]):
                flags.append(flag[0])
        dump.add_line(', '.join(flags))
        dump.add_newline()
        
        
        dump.add_header('PE Sections')
        
        section_flags = self.retrieve_flags(SECTION_CHARACTERISTICS, 'IMAGE_SCN_')
        
        for section in self.sections:
            dump.add_lines(section.dump())
            dump.add('Flags: ')
            flags = []
            for flag in section_flags:
                if getattr(section, flag[0]):
                    flags.append(flag[0])
            dump.add_line(', '.join(flags))
            dump.add_line('Entropy: %f (Min=0.0, Max=8.0)' % section.get_entropy() )
            if md5 is not None:
                dump.add_line('MD5     hash: %s' % section.get_hash_md5() )
            if sha1 is not None:
                dump.add_line('SHA-1   hash: %s' % section.get_hash_sha1() )
            if sha256 is not None:
                dump.add_line('SHA-256 hash: %s' % section.get_hash_sha256() )
            if sha512 is not None:
                dump.add_line('SHA-512 hash: %s' % section.get_hash_sha512() )
            dump.add_newline()
            
            
        
        if (hasattr(self, 'OPTIONAL_HEADER') and 
            hasattr(self.OPTIONAL_HEADER, 'DATA_DIRECTORY') ):
            
            dump.add_header('Directories')
            for idx in xrange(len(self.OPTIONAL_HEADER.DATA_DIRECTORY)):
                directory = self.OPTIONAL_HEADER.DATA_DIRECTORY[idx]
                dump.add_lines(directory.dump())
            dump.add_newline()

if hasattr(self, 'VS_VERSIONINFO'):
            dump.add_header('Version Information')
            dump.add_lines(self.VS_VERSIONINFO.dump())
            dump.add_newline()

if hasattr(self, 'VS_FIXEDFILEINFO'):
                dump.add_lines(self.VS_FIXEDFILEINFO.dump())
                dump.add_newline()

if hasattr(self, 'FileInfo'):
                for entry in self.FileInfo:
                    dump.add_lines(entry.dump())
                    dump.add_newline()
                    
                    if hasattr(entry, 'StringTable'):
                        for st_entry in entry.StringTable:
                            [dump.add_line('  '+line) for line in st_entry.dump()]
                            dump.add_line('  LangID: '+st_entry.LangID)
                            dump.add_newline()
                            for str_entry in st_entry.entries.items():
                                dump.add_line('    '+str_entry[0]+': '+str_entry[1])
                        dump.add_newline()
                                
                    elif hasattr(entry, 'Var'):
                        for var_entry in entry.Var:
                            if hasattr(var_entry, 'entry'):
                                [dump.add_line('  '+line) for line in var_entry.dump()]
                                dump.add_line(
                                    '    ' + var_entry.entry.keys()[0] + 
                                    ': ' + var_entry.entry.values()[0])
                                    
                        dump.add_newline()

if hasattr(self, 'DIRECTORY_ENTRY_EXPORT'):
            dump.add_header('Exported symbols')
            dump.add_lines(self.DIRECTORY_ENTRY_EXPORT.struct.dump())
            dump.add_newline()
            dump.add_line('%-10s   %-10s  %s' % ('Ordinal', 'RVA', 'Name'))
            for export in self.DIRECTORY_ENTRY_EXPORT.symbols:
                dump.add('%-10d 0x%08Xh    %s' % (
                    export.ordinal, export.address, export.name))
                if export.forwarder:
                    dump.add_line(' forwarder: %s' % export.forwarder)
                else:
                    dump.add_newline()
                
            dump.add_newline()
        
        if hasattr(self, 'DIRECTORY_ENTRY_IMPORT'):
            dump.add_header('Imported symbols')
            for module in self.DIRECTORY_ENTRY_IMPORT:
                dump.add_lines(module.struct.dump())
                dump.add_newline()
                for symbol in module.imports:
                
                    if symbol.import_by_ordinal is True:
                        dump.add('%s Ordinal[%s] (Imported by Ordinal)' % (
                            module.dll, str(symbol.ordinal)))
                    else:
                        dump.add('%s.%s Hint[%s]' % (
                            module.dll, symbol.name, str(symbol.hint)))

if symbol.bound:
                        dump.add_line(' Bound: 0x%08X' % (symbol.bound))
                    else:
                        dump.add_newline()
                dump.add_newline()
                
        
        if hasattr(self, 'DIRECTORY_ENTRY_BOUND_IMPORT'):
            dump.add_header('Bound imports')
            for bound_imp_desc in self.DIRECTORY_ENTRY_BOUND_IMPORT:
            
                dump.add_lines(bound_imp_desc.struct.dump())
                dump.add_line('DLL: %s' % bound_imp_desc.name)
                dump.add_newline()
                
                for bound_imp_ref in bound_imp_desc.entries:
                    dump.add_lines(bound_imp_ref.struct.dump(), 4)
                    dump.add_line('DLL: %s' % bound_imp_ref.name, 4)
                    dump.add_newline()

if hasattr(self, 'DIRECTORY_ENTRY_DELAY_IMPORT'):
            dump.add_header('Delay Imported symbols')
            for module in self.DIRECTORY_ENTRY_DELAY_IMPORT:
            
                dump.add_lines(module.struct.dump())
                dump.add_newline()
                
                for symbol in module.imports:
                    if symbol.import_by_ordinal is True:
                        dump.add('%s Ordinal[%s] (Imported by Ordinal)' % (
                            module.dll, str(symbol.ordinal)))
                    else:
                        dump.add('%s.%s Hint[%s]' % (
                            module.dll, symbol.name, str(symbol.hint)))
                    
                    if symbol.bound:
                        dump.add_line(' Bound: 0x%08X' % (symbol.bound))
                    else:
                        dump.add_newline()
                dump.add_newline()
            
        
        if hasattr(self, 'DIRECTORY_ENTRY_RESOURCE'):
            dump.add_header('Resource directory')
            
            dump.add_lines(self.DIRECTORY_ENTRY_RESOURCE.struct.dump())
            
            for resource_type in self.DIRECTORY_ENTRY_RESOURCE.entries:
            
                if resource_type.name is not None:
                    dump.add_line('Name: [%s]' % resource_type.name, 2)
                else:
                    dump.add_line('Id: [0x%X] (%s)' % (
                        resource_type.struct.Id, RESOURCE_TYPE.get(
                            resource_type.struct.Id, '-')),
                        2)
                        
                dump.add_lines(resource_type.struct.dump(), 2)

if hasattr(resource_type, 'directory'):
                
                    dump.add_lines(resource_type.directory.struct.dump(), 4)
                        
                    for resource_id in resource_type.directory.entries:
                    
                        if resource_id.name is not None:
                            dump.add_line('Name: [%s]' % resource_id.name, 6)
                        else:
                            dump.add_line('Id: [0x%X]' % resource_id.struct.Id, 6)
                            
                        dump.add_lines(resource_id.struct.dump(), 6)
    
                        if hasattr(resource_id, 'directory'):
                            dump.add_lines(resource_id.directory.struct.dump(), 8)
                            
                            for resource_lang in resource_id.directory.entries:
                            #    dump.add_line('\\--- LANG [%d,%d][%s]' % (
                            #        resource_lang.data.lang,
                            #        resource_lang.data.sublang,
                            #        LANG[resource_lang.data.lang]), 8)
                                dump.add_lines(resource_lang.struct.dump(), 10)
                                dump.add_lines(resource_lang.data.struct.dump(), 12)
                dump.add_newline()
    
            dump.add_newline()
        
        
        if ( hasattr(self, 'DIRECTORY_ENTRY_TLS') and 
             self.DIRECTORY_ENTRY_TLS and 
             self.DIRECTORY_ENTRY_TLS.struct ):
             
            dump.add_header('TLS')
            dump.add_lines(self.DIRECTORY_ENTRY_TLS.struct.dump())
            dump.add_newline()

if hasattr(self, 'DIRECTORY_ENTRY_DEBUG'):
            dump.add_header('Debug information')
            for dbg in self.DIRECTORY_ENTRY_DEBUG:
                dump.add_lines(dbg.struct.dump())
                try:
                    dump.add_line('Type: '+DEBUG_TYPE[dbg.struct.Type])
                except KeyError:
                    dump.add_line('Type: 0x%x(Unknown)' % dbg.struct.Type)
                dump.add_newline()
        
        
        if hasattr(self, 'DIRECTORY_ENTRY_BASERELOC'):
            dump.add_header('Base relocations')
            for base_reloc in self.DIRECTORY_ENTRY_BASERELOC:
                dump.add_lines(base_reloc.struct.dump())
                for reloc in base_reloc.entries:
                    try:
                        dump.add_line('%08Xh %s' % (
                            reloc.rva, RELOCATION_TYPE[reloc.type][16:]), 4)
                    except KeyError:
                        dump.add_line('0x%08X 0x%x(Unknown)' % (
                            reloc.rva, reloc.type), 4)
                dump.add_newline()

return dump.get_text()

# OC Patch
    def get_physical_by_rva(self, rva):
        """Gets the physical address in the PE file from an RVA value."""
        try:
            return self.get_offset_from_rva(rva)
        except Exception:
            return None

##
    # Double-Word get/set
    ##

def get_data_from_dword(self, dword):
        """Return a four byte string representing the double word value. (little endian)."""
        return struct.pack('<L', dword)

def get_dword_from_data(self, data, offset):
        """Convert four bytes of data to a double word (little endian)
        
        'offset' is assumed to index into a dword array. So setting it to
        N will return a dword out of the data sarting at offset N*4.
        
        Returns None if the data can't be turned into a double word.
        """
        
        if (offset+1)*4 > len(data):
            return None
        
        return struct.unpack('<L', data[offset*4:(offset+1)*4])[0]
        
        
    def get_dword_at_rva(self, rva):
        """Return the double word value at the given RVA.
        
        Returns None if the value can't be read, i.e. the RVA can't be mapped
        to a file offset.
        """
        
        try:
            return self.get_dword_from_data(self.get_data(rva)[:4], 0)
        except PEFormatError:
            return None

def get_dword_from_offset(self, offset):
        """Return the double word value at the given file offset. (little endian)"""
        
        if offset+4 > len(self.__data__):
            return None
            
        return self.get_dword_from_data(self.__data__[offset:offset+4], 0)

def set_dword_at_rva(self, rva, dword):
        """Set the double word value at the file offset corresponding to the given RVA."""
        return self.set_bytes_at_rva(rva, self.get_data_from_dword(dword))

def set_dword_at_offset(self, offset, dword):
        """Set the double word value at the given file offset."""
        return self.set_bytes_at_offset(offset, self.get_data_from_dword(dword))

##
    # Word get/set
    ##

def get_data_from_word(self, word):
        """Return a two byte string representing the word value. (little endian)."""
        return struct.pack('<H', word)

def get_word_from_data(self, data, offset):
        """Convert two bytes of data to a word (little endian)
        
        'offset' is assumed to index into a word array. So setting it to
        N will return a dword out of the data sarting at offset N*2.

Returns None if the data can't be turned into a word.
        """

if (offset+1)*2 > len(data):
            return None

return struct.unpack('<H', data[offset*2:(offset+1)*2])[0]

def get_word_at_rva(self, rva):
        """Return the word value at the given RVA.
        
        Returns None if the value can't be read, i.e. the RVA can't be mapped
        to a file offset.
        """
        
        try:
            return self.get_word_from_data(self.get_data(rva)[:2], 0)
        except PEFormatError:
            return None

def get_word_from_offset(self, offset):
        """Return the word value at the given file offset. (little endian)"""
        
        if offset+2 > len(self.__data__):
            return None
            
        return self.get_word_from_data(self.__data__[offset:offset+2], 0)

def set_word_at_rva(self, rva, word):
        """Set the word value at the file offset corresponding to the given RVA."""
        return self.set_bytes_at_rva(rva, self.get_data_from_word(word))

def set_word_at_offset(self, offset, word):
        """Set the word value at the given file offset."""
        return self.set_bytes_at_offset(offset, self.get_data_from_word(word))

##
    # Quad-Word get/set
    ##

def get_data_from_qword(self, word):
        """Return a eight byte string representing the quad-word value. (little endian)."""
        return struct.pack('<Q', word)

def get_qword_from_data(self, data, offset):
        """Convert eight bytes of data to a word (little endian)
        
        'offset' is assumed to index into a word array. So setting it to
        N will return a dword out of the data sarting at offset N*8.

Returns None if the data can't be turned into a quad word.
        """

if (offset+1)*8 > len(data):
            return None

return struct.unpack('<Q', data[offset*8:(offset+1)*8])[0]

def get_qword_at_rva(self, rva):
        """Return the quad-word value at the given RVA.
        
        Returns None if the value can't be read, i.e. the RVA can't be mapped
        to a file offset.
        """
        
        try:
            return self.get_qword_from_data(self.get_data(rva)[:8], 0)
        except PEFormatError:
            return None

def get_qword_from_offset(self, offset):
        """Return the quad-word value at the given file offset. (little endian)"""
        
        if offset+8 > len(self.__data__):
            return None
            
        return self.get_qword_from_data(self.__data__[offset:offset+8], 0)

def set_qword_at_rva(self, rva, qword):
        """Set the quad-word value at the file offset corresponding to the given RVA."""
        return self.set_bytes_at_rva(rva, self.get_data_from_qword(qword))

def set_qword_at_offset(self, offset, qword):
        """Set the quad-word value at the given file offset."""
        return self.set_bytes_at_offset(offset, self.get_data_from_qword(qword))

##
    # Set bytes
    ##

def set_bytes_at_rva(self, rva, data):
        """Overwrite, with the given string, the bytes at the file offset corresponding to the given RVA.
        
        Return True if successful, False otherwise. It can fail if the
        offset is outside the file's boundaries.
        """

offset = self.get_physical_by_rva(rva)
        if not offset:
            raise False
            
        return self.set_bytes_at_offset(offset, data)

def set_bytes_at_offset(self, offset, data):
        """Overwrite the bytes at the given file offset with the given string.
        
        Return True if successful, False otherwise. It can fail if the
        offset is outside the file's boundaries.
        """
    
        if not isinstance(data, str):
            raise TypeError('data should be of type: str')
        
        if offset >= 0 and offset < len(self.__data__):
            self.__data__ = ( self.__data__[:offset] + 
                data +
                self.__data__[offset+len(data):] )
        else:
            return False
            
        # Refresh the section's data with the modified information
        #
        for section in self.sections:
            section_data_start = section.PointerToRawData
            section_data_end = section_data_start+section.SizeOfRawData
            section.data = self.__data__[section_data_start:section_data_end]

return True

def relocate_image(self, new_ImageBase):
        """Apply the relocation information to the image using the provided new image base.
        
        This method will apply the relocation information to the image. Given the new base,
        all the relocations will be processed and both the raw data and the section's data
        will be fixed accordingly.
        The resulting image can be retrieved as well through the method:
        
            get_memory_mapped_image()
            
        In order to get something that would more closely match what could be found in memory
        once the Windows loader finished its work.
        """
        
        relocation_difference = new_ImageBase - self.OPTIONAL_HEADER.ImageBase
        
        
        for reloc in self.DIRECTORY_ENTRY_BASERELOC:

virtual_address = reloc.struct.VirtualAddress
            size_of_block = reloc.struct.SizeOfBlock
            
            # We iterate with an index because if the relocation is of type
            # IMAGE_REL_BASED_HIGHADJ we need to also process the next entry
            # at once and skip it for the next interation
            # 
            entry_idx = 0
            while entry_idx<len(reloc.entries):
            
                entry = reloc.entries[entry_idx]
                entry_idx += 1
                                
                if entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_ABSOLUTE']:
                    # Nothing to do for this type of relocation
                    pass
                    
                elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGH']:
                    # Fix the high 16bits of a relocation
                    #
                    # Add high 16bits of relocation_difference to the
                    # 16bit value at RVA=entry.rva
                    
                    self.set_word_at_rva(
                        entry.rva,
                        ( self.get_word_at_rva(entry.rva) + relocation_difference>>16)&0xffff )
                  
                elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_LOW']:
                    # Fix the low 16bits of a relocation
                    #
                    # Add low 16 bits of relocation_difference to the 16bit value
                    # at RVA=entry.rva
                    
                    self.set_word_at_rva(
                        entry.rva,
                        ( self.get_word_at_rva(entry.rva) + relocation_difference)&0xffff)
                    
                elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGHLOW']:
                    # Handle all high and low parts of a 32bit relocation
                    #
                    # Add relocation_difference to the value at RVA=entry.rva
                    
                    self.set_dword_at_rva(
                        entry.rva,
                        self.get_dword_at_rva(entry.rva)+relocation_difference)
                  
                elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGHADJ']:
                    # Fix the high 16bits of a relocation and adjust
                    #
                    # Add high 16bits of relocation_difference to the 32bit value
                    # composed from the (16bit value at RVA=entry.rva)<<16 plus 
                    # the 16bit value at the next relocation entry.
                    #
                    
                    # If the next entry is beyond the array's limits,
                    # abort... the table is corrupt
                    #
                    if entry_idx == len(reloc.entries):
                        break
                    
                    next_entry = reloc.entries[entry_idx]
                    entry_idx += 1
                    self.set_word_at_rva( entry.rva, 
                        ((self.get_word_at_rva(entry.rva)<<16) + next_entry.rva +
                        relocation_difference & 0xffff0000) >> 16 )
                
                elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_DIR64']:
                    # Apply the difference to the 64bit value at the offset
                    # RVA=entry.rva
                    
                    self.set_qword_at_rva(
                        entry.rva,
                        self.get_qword_at_rva(entry.rva) + relocation_difference)

def verify_checksum(self):
    
        return self.OPTIONAL_HEADER.CheckSum == self.generate_checksum()
        
    
    def generate_checksum(self):
    
        # Get the offset to the CheckSum field in the OptionalHeader
        #
        checksum_offset = self.OPTIONAL_HEADER.__file_offset__ + 0x40 # 64
        
        checksum = 0
        
        for i in range( len(self.__data__) / 4 ):
        
            # Skip the checksum field
            #
            if i == checksum_offset / 4:
                continue

dword = struct.unpack('L', self.__data__[ i*4 : i*4+4 ])[0]
            checksum = (checksum & 0xffffffff) + dword + (checksum>>32)
            if checksum > 2**32:
                checksum = (checksum & 0xffffffff) + (checksum >> 32)
        
        checksum = (checksum & 0xffff) + (checksum >> 16)
        checksum = (checksum) + (checksum >> 16)
        checksum = checksum & 0xffff
        
        return checksum + len(self.__data__)

普通文本 | 3730行 | 136.35 KB

# -*- coding: Latin-1 -*-
"""pefile, Portable Executable reader module


All the PE file basic structures are available with their default names
as attributes of the instance returned.

Processed elements such as the import table are made available with lowercase
names, to differentiate them from the upper case basic structure names.

pefile has been tested against the limits of valid PE headers, that is, malware.
Lots of packed malware attempt to abuse the format way beyond its standard use.
To the best of my knowledge most of the abuses are handled gracefully.

Copyright (c) 2005, 2006, 2007, 2008 Ero Carrera <ero@dkbza.org>

All rights reserved.

For detailed copyright information see the file COPYING in
the root of the distribution archive.
"""

__author__ = 'Ero Carrera'
__version__ = '1.2.9.1'
__contact__ = 'ero@dkbza.org'


import os
import struct
import time
import math
import re
import exceptions
import string
import array

sha1, sha256, sha512, md5 = None, None, None, None

try:
    import hashlib
    sha1 = hashlib.sha1
    sha256 = hashlib.sha256
    sha512 = hashlib.sha512
    md5 = hashlib.md5
except ImportError:    
    try:
        import sha
        sha1 = sha.new
    except ImportError:
        pass
    try:
        import md5
        md5 = md5.new
    except ImportError:
        pass


fast_load = False

IMAGE_DOS_SIGNATURE             = 0x5A4D
IMAGE_OS2_SIGNATURE             = 0x454E
IMAGE_OS2_SIGNATURE_LE          = 0x454C
IMAGE_VXD_SIGNATURE             = 0x454C
IMAGE_NT_SIGNATURE              = 0x00004550
IMAGE_NUMBEROF_DIRECTORY_ENTRIES= 16
IMAGE_ORDINAL_FLAG              = 0x80000000L
IMAGE_ORDINAL_FLAG64            = 0x8000000000000000L
OPTIONAL_HEADER_MAGIC_PE        = 0x10b
OPTIONAL_HEADER_MAGIC_PE_PLUS   = 0x20b


directory_entry_types = [
    ('IMAGE_DIRECTORY_ENTRY_EXPORT',        0),
    ('IMAGE_DIRECTORY_ENTRY_IMPORT',        1),
    ('IMAGE_DIRECTORY_ENTRY_RESOURCE',      2),
    ('IMAGE_DIRECTORY_ENTRY_EXCEPTION',     3),
    ('IMAGE_DIRECTORY_ENTRY_SECURITY',      4),
    ('IMAGE_DIRECTORY_ENTRY_BASERELOC',     5),
    ('IMAGE_DIRECTORY_ENTRY_DEBUG',         6),
    ('IMAGE_DIRECTORY_ENTRY_COPYRIGHT',     7),
    ('IMAGE_DIRECTORY_ENTRY_GLOBALPTR',     8),
    ('IMAGE_DIRECTORY_ENTRY_TLS',           9),
    ('IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG',   10),
    ('IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT',  11),
    ('IMAGE_DIRECTORY_ENTRY_IAT',           12),
    ('IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT',  13),
    ('IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR',14),
    ('IMAGE_DIRECTORY_ENTRY_RESERVED',      15) ]

DIRECTORY_ENTRY = dict([(e[1], e[0]) for e in directory_entry_types]+directory_entry_types)
 

image_characteristics = [
    ('IMAGE_FILE_RELOCS_STRIPPED',          0x0001),
    ('IMAGE_FILE_EXECUTABLE_IMAGE',         0x0002),
    ('IMAGE_FILE_LINE_NUMS_STRIPPED',       0x0004),
    ('IMAGE_FILE_LOCAL_SYMS_STRIPPED',      0x0008),
    ('IMAGE_FILE_AGGRESIVE_WS_TRIM',        0x0010),
    ('IMAGE_FILE_LARGE_ADDRESS_AWARE',      0x0020),
    ('IMAGE_FILE_16BIT_MACHINE',            0x0040),
    ('IMAGE_FILE_BYTES_REVERSED_LO',        0x0080),
    ('IMAGE_FILE_32BIT_MACHINE',            0x0100),
    ('IMAGE_FILE_DEBUG_STRIPPED',           0x0200),
    ('IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP',  0x0400),
    ('IMAGE_FILE_NET_RUN_FROM_SWAP',        0x0800),
    ('IMAGE_FILE_SYSTEM',                   0x1000),
    ('IMAGE_FILE_DLL',                      0x2000),
    ('IMAGE_FILE_UP_SYSTEM_ONLY',           0x4000),
    ('IMAGE_FILE_BYTES_REVERSED_HI',        0x8000) ]

IMAGE_CHARACTERISTICS = dict([(e[1], e[0]) for e in
    image_characteristics]+image_characteristics)

    
section_characteristics = [
    ('IMAGE_SCN_CNT_CODE',                  0x00000020),
    ('IMAGE_SCN_CNT_INITIALIZED_DATA',      0x00000040),
    ('IMAGE_SCN_CNT_UNINITIALIZED_DATA',    0x00000080),
    ('IMAGE_SCN_LNK_OTHER',                 0x00000100),
    ('IMAGE_SCN_LNK_INFO',                  0x00000200),
    ('IMAGE_SCN_LNK_REMOVE',                0x00000800),
    ('IMAGE_SCN_LNK_COMDAT',                0x00001000),
    ('IMAGE_SCN_MEM_FARDATA',               0x00008000),
    ('IMAGE_SCN_MEM_PURGEABLE',             0x00020000),
    ('IMAGE_SCN_MEM_16BIT',                 0x00020000),
    ('IMAGE_SCN_MEM_LOCKED',                0x00040000),
    ('IMAGE_SCN_MEM_PRELOAD',               0x00080000),
    ('IMAGE_SCN_ALIGN_1BYTES',              0x00100000),
    ('IMAGE_SCN_ALIGN_2BYTES',              0x00200000),
    ('IMAGE_SCN_ALIGN_4BYTES',              0x00300000),
    ('IMAGE_SCN_ALIGN_8BYTES',              0x00400000),
    ('IMAGE_SCN_ALIGN_16BYTES',             0x00500000),
    ('IMAGE_SCN_ALIGN_32BYTES',             0x00600000),
    ('IMAGE_SCN_ALIGN_64BYTES',             0x00700000),
    ('IMAGE_SCN_ALIGN_128BYTES',            0x00800000),
    ('IMAGE_SCN_ALIGN_256BYTES',            0x00900000),
    ('IMAGE_SCN_ALIGN_512BYTES',            0x00A00000),
    ('IMAGE_SCN_ALIGN_1024BYTES',           0x00B00000),
    ('IMAGE_SCN_ALIGN_2048BYTES',           0x00C00000),
    ('IMAGE_SCN_ALIGN_4096BYTES',           0x00D00000),
    ('IMAGE_SCN_ALIGN_8192BYTES',           0x00E00000),
    ('IMAGE_SCN_ALIGN_MASK',                0x00F00000),
    ('IMAGE_SCN_LNK_NRELOC_OVFL',           0x01000000),
    ('IMAGE_SCN_MEM_DISCARDABLE',           0x02000000),
    ('IMAGE_SCN_MEM_NOT_CACHED',            0x04000000),
    ('IMAGE_SCN_MEM_NOT_PAGED',             0x08000000),
    ('IMAGE_SCN_MEM_SHARED',                0x10000000),
    ('IMAGE_SCN_MEM_EXECUTE',               0x20000000),
    ('IMAGE_SCN_MEM_READ',                  0x40000000),
    ('IMAGE_SCN_MEM_WRITE',                 0x80000000L) ]
 
SECTION_CHARACTERISTICS = dict([(e[1], e[0]) for e in
    section_characteristics]+section_characteristics)


debug_types = [
    ('IMAGE_DEBUG_TYPE_UNKNOWN',        0),
    ('IMAGE_DEBUG_TYPE_COFF',           1),
    ('IMAGE_DEBUG_TYPE_CODEVIEW',       2),
    ('IMAGE_DEBUG_TYPE_FPO',            3),
    ('IMAGE_DEBUG_TYPE_MISC',           4),
    ('IMAGE_DEBUG_TYPE_EXCEPTION',      5),
    ('IMAGE_DEBUG_TYPE_FIXUP',          6),
    ('IMAGE_DEBUG_TYPE_OMAP_TO_SRC',    7),
    ('IMAGE_DEBUG_TYPE_OMAP_FROM_SRC',  8),
    ('IMAGE_DEBUG_TYPE_BORLAND',        9),
    ('IMAGE_DEBUG_TYPE_RESERVED10',     10) ]

DEBUG_TYPE = dict([(e[1], e[0]) for e in debug_types]+debug_types)


subsystem_types = [
    ('IMAGE_SUBSYSTEM_UNKNOWN',     0),
    ('IMAGE_SUBSYSTEM_NATIVE',      1),
    ('IMAGE_SUBSYSTEM_WINDOWS_GUI', 2),
    ('IMAGE_SUBSYSTEM_WINDOWS_CUI', 3),
    ('IMAGE_SUBSYSTEM_OS2_CUI',     5),
    ('IMAGE_SUBSYSTEM_POSIX_CUI',   7),
    ('IMAGE_SUBSYSTEM_WINDOWS_CE_GUI',  9),
    ('IMAGE_SUBSYSTEM_EFI_APPLICATION', 10),
    ('IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER', 11),
    ('IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER',      12),
    ('IMAGE_SUBSYSTEM_EFI_ROM',     13),
    ('IMAGE_SUBSYSTEM_XBOX',        14)]

SUBSYSTEM_TYPE = dict([(e[1], e[0]) for e in subsystem_types]+subsystem_types)


machine_types = [
    ('IMAGE_FILE_MACHINE_UNKNOWN',  0),
    ('IMAGE_FILE_MACHINE_AM33',     0x1d3),
    ('IMAGE_FILE_MACHINE_AMD64',    0x8664),
    ('IMAGE_FILE_MACHINE_ARM',      0x1c0),
    ('IMAGE_FILE_MACHINE_EBC',      0xebc),
    ('IMAGE_FILE_MACHINE_I386',     0x14c),
    ('IMAGE_FILE_MACHINE_IA64',     0x200),
    ('IMAGE_FILE_MACHINE_MR32',     0x9041),
    ('IMAGE_FILE_MACHINE_MIPS16',   0x266),
    ('IMAGE_FILE_MACHINE_MIPSFPU',  0x366),
    ('IMAGE_FILE_MACHINE_MIPSFPU16',0x466),
    ('IMAGE_FILE_MACHINE_POWERPC',  0x1f0),
    ('IMAGE_FILE_MACHINE_POWERPCFP',0x1f1),
    ('IMAGE_FILE_MACHINE_R4000',    0x166),
    ('IMAGE_FILE_MACHINE_SH3',      0x1a2),
    ('IMAGE_FILE_MACHINE_SH3DSP',   0x1a3),
    ('IMAGE_FILE_MACHINE_SH4',      0x1a6),
    ('IMAGE_FILE_MACHINE_SH5',      0x1a8),
    ('IMAGE_FILE_MACHINE_THUMB',    0x1c2),
    ('IMAGE_FILE_MACHINE_WCEMIPSV2',0x169),
 ]

MACHINE_TYPE = dict([(e[1], e[0]) for e in machine_types]+machine_types)


relocation_types = [
    ('IMAGE_REL_BASED_ABSOLUTE',        0),
    ('IMAGE_REL_BASED_HIGH',            1),
    ('IMAGE_REL_BASED_LOW',             2),
    ('IMAGE_REL_BASED_HIGHLOW',         3),
    ('IMAGE_REL_BASED_HIGHADJ',         4),
    ('IMAGE_REL_BASED_MIPS_JMPADDR',    5),
    ('IMAGE_REL_BASED_SECTION',         6),
    ('IMAGE_REL_BASED_REL',             7),
    ('IMAGE_REL_BASED_MIPS_JMPADDR16',  9),
    ('IMAGE_REL_BASED_IA64_IMM64',      9),
    ('IMAGE_REL_BASED_DIR64',           10),
    ('IMAGE_REL_BASED_HIGH3ADJ',        11) ]

RELOCATION_TYPE = dict([(e[1], e[0]) for e in relocation_types]+relocation_types)


dll_characteristics = [
    ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0001', 0x0001),
    ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0002', 0x0002),
    ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0004', 0x0004),
    ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0008', 0x0008),
    ('IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE',      0x0040),
    ('IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY',   0x0080),
    ('IMAGE_DLL_CHARACTERISTICS_NX_COMPAT',         0x0100),
    ('IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION',      0x0200),
    ('IMAGE_DLL_CHARACTERISTICS_NO_SEH',    0x0400),
    ('IMAGE_DLL_CHARACTERISTICS_NO_BIND',   0x0800),
    ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x1000', 0x1000),
    ('IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER',    0x2000),
    ('IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE', 0x8000) ]

DLL_CHARACTERISTICS = dict([(e[1], e[0]) for e in dll_characteristics]+dll_characteristics)


# Resource types
resource_type = [
    ('RT_CURSOR',          1),
    ('RT_BITMAP',          2),
    ('RT_ICON',            3),
    ('RT_MENU',            4),
    ('RT_DIALOG',          5),
    ('RT_STRING',          6),
    ('RT_FONTDIR',         7),
    ('RT_FONT',            8),
    ('RT_ACCELERATOR',     9),
    ('RT_RCDATA',          10),
    ('RT_MESSAGETABLE',    11),
    ('RT_GROUP_CURSOR',    12),
    ('RT_GROUP_ICON',      14),
    ('RT_VERSION',         16),
    ('RT_DLGINCLUDE',      17),
    ('RT_PLUGPLAY',        19),
    ('RT_VXD',             20),
    ('RT_ANICURSOR',       21),
    ('RT_ANIICON',         22),
    ('RT_HTML',            23),
    ('RT_MANIFEST',        24) ]

RESOURCE_TYPE = dict([(e[1], e[0]) for e in resource_type]+resource_type)

    
# Language definitions
lang = [
 ('LANG_NEUTRAL',       0x00),
 ('LANG_INVARIANT',     0x7f),
 ('LANG_AFRIKAANS',     0x36),
 ('LANG_ALBANIAN',      0x1c),
 ('LANG_ARABIC',        0x01),
 ('LANG_ARMENIAN',      0x2b),
 ('LANG_ASSAMESE',      0x4d),
 ('LANG_AZERI',         0x2c),
 ('LANG_BASQUE',        0x2d),
 ('LANG_BELARUSIAN',    0x23),
 ('LANG_BENGALI',       0x45),
 ('LANG_BULGARIAN',     0x02),
 ('LANG_CATALAN',       0x03),
 ('LANG_CHINESE',       0x04),
 ('LANG_CROATIAN',      0x1a),
 ('LANG_CZECH',         0x05),
 ('LANG_DANISH',        0x06),
 ('LANG_DIVEHI',        0x65),
 ('LANG_DUTCH',         0x13),
 ('LANG_ENGLISH',       0x09),
 ('LANG_ESTONIAN',      0x25),
 ('LANG_FAEROESE',      0x38),
 ('LANG_FARSI',         0x29),
 ('LANG_FINNISH',       0x0b),
 ('LANG_FRENCH',        0x0c),
 ('LANG_GALICIAN',      0x56),
 ('LANG_GEORGIAN',      0x37),
 ('LANG_GERMAN',        0x07),
 ('LANG_GREEK',         0x08),
 ('LANG_GUJARATI',      0x47),
 ('LANG_HEBREW',        0x0d),
 ('LANG_HINDI',         0x39),
 ('LANG_HUNGARIAN',     0x0e),
 ('LANG_ICELANDIC',     0x0f),
 ('LANG_INDONESIAN',    0x21),
 ('LANG_ITALIAN',       0x10),
 ('LANG_JAPANESE',      0x11),
 ('LANG_KANNADA',       0x4b),
 ('LANG_KASHMIRI',      0x60),
 ('LANG_KAZAK',         0x3f),
 ('LANG_KONKANI',       0x57),
 ('LANG_KOREAN',        0x12),
 ('LANG_KYRGYZ',        0x40),
 ('LANG_LATVIAN',       0x26),
 ('LANG_LITHUANIAN',    0x27),
 ('LANG_MACEDONIAN',    0x2f),
 ('LANG_MALAY',         0x3e),
 ('LANG_MALAYALAM',     0x4c),
 ('LANG_MANIPURI',      0x58),
 ('LANG_MARATHI',       0x4e),
 ('LANG_MONGOLIAN',     0x50),
 ('LANG_NEPALI',        0x61),
 ('LANG_NORWEGIAN',     0x14),
 ('LANG_ORIYA',         0x48),
 ('LANG_POLISH',        0x15),
 ('LANG_PORTUGUESE',    0x16),
 ('LANG_PUNJABI',       0x46),
 ('LANG_ROMANIAN',      0x18),
 ('LANG_RUSSIAN',       0x19),
 ('LANG_SANSKRIT',      0x4f),
 ('LANG_SERBIAN',       0x1a),
 ('LANG_SINDHI',        0x59),
 ('LANG_SLOVAK',        0x1b),
 ('LANG_SLOVENIAN',     0x24),
 ('LANG_SPANISH',       0x0a),
 ('LANG_SWAHILI',       0x41),
 ('LANG_SWEDISH',       0x1d),
 ('LANG_SYRIAC',        0x5a),
 ('LANG_TAMIL',         0x49),
 ('LANG_TATAR',         0x44),
 ('LANG_TELUGU',        0x4a),
 ('LANG_THAI',          0x1e),
 ('LANG_TURKISH',       0x1f),
 ('LANG_UKRAINIAN',     0x22),
 ('LANG_URDU',          0x20),
 ('LANG_UZBEK',         0x43),
 ('LANG_VIETNAMESE',    0x2a),
 ('LANG_GAELIC',        0x3c),
 ('LANG_MALTESE',       0x3a),
 ('LANG_MAORI',         0x28),
 ('LANG_RHAETO_ROMANCE',0x17),
 ('LANG_SAAMI',         0x3b),
 ('LANG_SORBIAN',       0x2e),
 ('LANG_SUTU',          0x30),
 ('LANG_TSONGA',        0x31),
 ('LANG_TSWANA',        0x32),
 ('LANG_VENDA',         0x33),
 ('LANG_XHOSA',         0x34),
 ('LANG_ZULU',          0x35),
 ('LANG_ESPERANTO',     0x8f),
 ('LANG_WALON',         0x90),
 ('LANG_CORNISH',       0x91),
 ('LANG_WELSH',         0x92),
 ('LANG_BRETON',        0x93) ]

LANG = dict(lang+[(e[1], e[0]) for e in lang])


# Sublanguage definitions
sublang =  [
 ('SUBLANG_NEUTRAL',                        0x00),
 ('SUBLANG_DEFAULT',                        0x01),
 ('SUBLANG_SYS_DEFAULT',                    0x02),
 ('SUBLANG_ARABIC_SAUDI_ARABIA',            0x01),
 ('SUBLANG_ARABIC_IRAQ',                    0x02),
 ('SUBLANG_ARABIC_EGYPT',                   0x03),
 ('SUBLANG_ARABIC_LIBYA',                   0x04),
 ('SUBLANG_ARABIC_ALGERIA',                 0x05),
 ('SUBLANG_ARABIC_MOROCCO',                 0x06),
 ('SUBLANG_ARABIC_TUNISIA',                 0x07),
 ('SUBLANG_ARABIC_OMAN',                    0x08),
 ('SUBLANG_ARABIC_YEMEN',                   0x09),
 ('SUBLANG_ARABIC_SYRIA',                   0x0a),
 ('SUBLANG_ARABIC_JORDAN',                  0x0b),
 ('SUBLANG_ARABIC_LEBANON',                 0x0c),
 ('SUBLANG_ARABIC_KUWAIT',                  0x0d),
 ('SUBLANG_ARABIC_UAE',                     0x0e),
 ('SUBLANG_ARABIC_BAHRAIN',                 0x0f),
 ('SUBLANG_ARABIC_QATAR',                   0x10),
 ('SUBLANG_AZERI_LATIN',                    0x01),
 ('SUBLANG_AZERI_CYRILLIC',                 0x02),
 ('SUBLANG_CHINESE_TRADITIONAL',            0x01),
 ('SUBLANG_CHINESE_SIMPLIFIED',             0x02),
 ('SUBLANG_CHINESE_HONGKONG',               0x03),
 ('SUBLANG_CHINESE_SINGAPORE',              0x04),
 ('SUBLANG_CHINESE_MACAU',                  0x05),
 ('SUBLANG_DUTCH',                          0x01),
 ('SUBLANG_DUTCH_BELGIAN',                  0x02),
 ('SUBLANG_ENGLISH_US',                     0x01),
 ('SUBLANG_ENGLISH_UK',                     0x02),
 ('SUBLANG_ENGLISH_AUS',                    0x03),
 ('SUBLANG_ENGLISH_CAN',                    0x04),
 ('SUBLANG_ENGLISH_NZ',                     0x05),
 ('SUBLANG_ENGLISH_EIRE',                   0x06),
 ('SUBLANG_ENGLISH_SOUTH_AFRICA',           0x07),
 ('SUBLANG_ENGLISH_JAMAICA',                0x08),
 ('SUBLANG_ENGLISH_CARIBBEAN',              0x09),
 ('SUBLANG_ENGLISH_BELIZE',                 0x0a),
 ('SUBLANG_ENGLISH_TRINIDAD',               0x0b),
 ('SUBLANG_ENGLISH_ZIMBABWE',               0x0c),
 ('SUBLANG_ENGLISH_PHILIPPINES',            0x0d),
 ('SUBLANG_FRENCH',                         0x01),
 ('SUBLANG_FRENCH_BELGIAN',                 0x02),
 ('SUBLANG_FRENCH_CANADIAN',                0x03),
 ('SUBLANG_FRENCH_SWISS',                   0x04),
 ('SUBLANG_FRENCH_LUXEMBOURG',              0x05),
 ('SUBLANG_FRENCH_MONACO',                  0x06),
 ('SUBLANG_GERMAN',                         0x01),
 ('SUBLANG_GERMAN_SWISS',                   0x02),
 ('SUBLANG_GERMAN_AUSTRIAN',                0x03),
 ('SUBLANG_GERMAN_LUXEMBOURG',              0x04),
 ('SUBLANG_GERMAN_LIECHTENSTEIN',           0x05),
 ('SUBLANG_ITALIAN',                        0x01),
 ('SUBLANG_ITALIAN_SWISS',                  0x02),
 ('SUBLANG_KASHMIRI_SASIA',                 0x02),
 ('SUBLANG_KASHMIRI_INDIA',                 0x02),
 ('SUBLANG_KOREAN',                         0x01),
 ('SUBLANG_LITHUANIAN',                     0x01),
 ('SUBLANG_MALAY_MALAYSIA',                 0x01),
 ('SUBLANG_MALAY_BRUNEI_DARUSSALAM',        0x02),
 ('SUBLANG_NEPALI_INDIA',                   0x02),
 ('SUBLANG_NORWEGIAN_BOKMAL',               0x01),
 ('SUBLANG_NORWEGIAN_NYNORSK',              0x02),
 ('SUBLANG_PORTUGUESE',                     0x02),
 ('SUBLANG_PORTUGUESE_BRAZILIAN',           0x01),
 ('SUBLANG_SERBIAN_LATIN',                  0x02),
 ('SUBLANG_SERBIAN_CYRILLIC',               0x03),
 ('SUBLANG_SPANISH',                        0x01),
 ('SUBLANG_SPANISH_MEXICAN',                0x02),
 ('SUBLANG_SPANISH_MODERN',                 0x03),
 ('SUBLANG_SPANISH_GUATEMALA',              0x04),
 ('SUBLANG_SPANISH_COSTA_RICA',             0x05),
 ('SUBLANG_SPANISH_PANAMA',                 0x06),
 ('SUBLANG_SPANISH_DOMINICAN_REPUBLIC',     0x07),
 ('SUBLANG_SPANISH_VENEZUELA',              0x08),
 ('SUBLANG_SPANISH_COLOMBIA',               0x09),
 ('SUBLANG_SPANISH_PERU',                   0x0a),
 ('SUBLANG_SPANISH_ARGENTINA',              0x0b),
 ('SUBLANG_SPANISH_ECUADOR',                0x0c),
 ('SUBLANG_SPANISH_CHILE',                  0x0d),
 ('SUBLANG_SPANISH_URUGUAY',                0x0e),
 ('SUBLANG_SPANISH_PARAGUAY',               0x0f),
 ('SUBLANG_SPANISH_BOLIVIA',                0x10),
 ('SUBLANG_SPANISH_EL_SALVADOR',            0x11),
 ('SUBLANG_SPANISH_HONDURAS',               0x12),
 ('SUBLANG_SPANISH_NICARAGUA',              0x13),
 ('SUBLANG_SPANISH_PUERTO_RICO',            0x14),
 ('SUBLANG_SWEDISH',                        0x01),
 ('SUBLANG_SWEDISH_FINLAND',                0x02),
 ('SUBLANG_URDU_PAKISTAN',                  0x01),
 ('SUBLANG_URDU_INDIA',                     0x02),
 ('SUBLANG_UZBEK_LATIN',                    0x01),
 ('SUBLANG_UZBEK_CYRILLIC',                 0x02),
 ('SUBLANG_DUTCH_SURINAM',                  0x03),
 ('SUBLANG_ROMANIAN',                       0x01),
 ('SUBLANG_ROMANIAN_MOLDAVIA',              0x02),
 ('SUBLANG_RUSSIAN',                        0x01),
 ('SUBLANG_RUSSIAN_MOLDAVIA',               0x02),
 ('SUBLANG_CROATIAN',                       0x01),
 ('SUBLANG_LITHUANIAN_CLASSIC',             0x02),
 ('SUBLANG_GAELIC',                         0x01),
 ('SUBLANG_GAELIC_SCOTTISH',                0x02),
 ('SUBLANG_GAELIC_MANX',                    0x03) ]

SUBLANG = dict(sublang+[(e[1], e[0]) for e in sublang])


class UnicodeStringWrapperPostProcessor:
    """This class attemps to help the process of identifying strings
    that might be plain Unicode or Pascal. A list of strings will be
    wrapped on it with the hope the overlappings will help make the 
    decission about their type."""
    
    def __init__(self, pe, rva_ptr):
        self.pe = pe
        self.rva_ptr = rva_ptr
        self.string = None
        
        
    def get_rva(self):
        """Get the RVA of the string."""
        
        return self.rva_ptr
        
        
    def __str__(self):
        """Return the escaped ASCII representation of the string."""
    
        def convert_char(char):
            if char in string.printable:
                return char
            else:
                return r'\x%02x' % ord(char)
                
        if self.string:
            return ''.join([convert_char(c) for c in self.string])
            
        return ''
        
    
    def invalidate(self):
        """Make this instance None, to express it's no known string type."""
        
        self = None
    
        
    def render_pascal_16(self):
    
        self.string = self.pe.get_string_u_at_rva(
            self.rva_ptr+2, 
            max_length=self.__get_pascal_16_length())


    def ask_pascal_16(self, next_rva_ptr):
        """The next RVA is taken to be the one immediately following this one.
        
        Such RVA could indicate the natural end of the string and will be checked
        with the possible length contained in the first word.
        """
        
        length = self.__get_pascal_16_length()
        
        if length == (next_rva_ptr - (self.rva_ptr+2)) / 2:
            self.length = length
            return True
            
        return False
        
        
    def __get_pascal_16_length(self):
    
        return self.__get_word_value_at_rva(self.rva_ptr)
        
    
    def __get_word_value_at_rva(self, rva):

        try:
            data = self.pe.get_data(self.rva_ptr, 2)
        except PEFormatError, e:
            return False

        if len(data)<2:
            return False

        return struct.unpack('<H', data)[0]

    
    #def render_pascal_8(self):
    #    """"""
        
        
    def ask_unicode_16(self, next_rva_ptr):
        """The next RVA is taken to be the one immediately following this one.
        
        Such RVA could indicate the natural end of the string and will be checked
        to see if there's a Unicode NULL character there.
        """
        
        if self.__get_word_value_at_rva(next_rva_ptr-2) == 0:
            self.length = next_rva_ptr - self.rva_ptr
            return True
            
        return False
        

    def render_unicode_16(self):
        """"""

        self.string = self.pe.get_string_u_at_rva(self.rva_ptr)
            

class PEFormatError(Exception):
    """Generic PE format error exception."""
    
    def __init__(self, value):
        self.value = value

    def __str__(self):
        return repr(self.value)


class Dump:
    """Convenience class for dumping the PE information."""
    
    def __init__(self):
        self.text = ''
    
        
    def add_lines(self, txt, indent=0):
        """Adds a list of lines.
        
        The list can be indented with the optional argument 'indent'.
        """
        for line in txt:
            self.add_line(line, indent)
        
            
    def add_line(self, txt, indent=0):
        """Adds a line.
        
        The line can be indented with the optional argument 'indent'.
        """
        
        self.add(txt+'\n', indent)
    
        
    def add(self, txt, indent=0):
        """Adds some text, no newline will be appended.
        
        The text can be indented with the optional argument 'indent'.
        """
        
        if isinstance(txt, unicode):
            s = []
            for c in txt:
                try:
                    s.append(str(c))
                except UnicodeEncodeError, e:
                    s.append(repr(c))
                        
            txt = ''.join(s)
        
        self.text += ' '*indent+txt
    
        
    def add_header(self, txt):
        """Adds a header element."""
        
        self.add_line('-'*10+txt+'-'*10+'\n')
        
        
    def add_newline(self):
        """Adds a newline."""
        
        self.text += '\n'
        
        
    def get_text(self):
        """Get the text in its current state."""
    
        return self.text



class Structure:
    """Prepare structure object to extract members from data.
    
    Format is a list containing definitions for the elements
    of the structure.
    """
    
    
    def __init__(self, format, name=None, file_offset=None):
        # Format is forced little endian, for big endian non Intel platforms
        self.__format__ = '<'
        self.__keys__ = []
#        self.values = {}
        self.__format_length__ = 0
        self.__set_format__(format[1])
        self._all_zeroes = False
        self.__unpacked_data_elms__ = None
        self.__file_offset__ = file_offset
        if name:
            self.name = name
        else:
            self.name = format[0]
                
            
    def __get_format__(self):
        return self.__format__
        
        
    def get_file_offset(self):
        return self.__file_offset__

    def set_file_offset(self, offset):
        self.__file_offset__ = offset
        
    def all_zeroes(self):
        """Returns true is the unpacked data is all zeroes."""
        
        return self._all_zeroes

                
    def __set_format__(self, format):
    
        for elm in format:
            if ',' in elm:
                elm_type, elm_name = elm.split(',', 1)
                self.__format__ += elm_type
                
                elm_names = elm_name.split(',')
                names = []
                for elm_name in elm_names:
                    if elm_name in self.__keys__:
                        search_list = [x[:len(elm_name)] for x in self.__keys__]
                        occ_count = search_list.count(elm_name)
                        elm_name = elm_name+'_'+str(occ_count)
                    names.append(elm_name)
                # Some PE header structures have unions on them, so a certain
                # value might have different names, so each key has a list of
                # all the possible members referring to the data.
                self.__keys__.append(names)
                    
        self.__format_length__ = struct.calcsize(self.__format__)
        
        
    def sizeof(self):
        """Return size of the structure."""
    
        return self.__format_length__
        
        
    def __unpack__(self, data):
    
        if len(data)>self.__format_length__:
            data = data[:self.__format_length__]
            
        # OC Patch:
        # Some malware have incorrect header lengths.
        # Fail gracefully if this occurs
        # Buggy malware: a29b0118af8b7408444df81701ad5a7f
        #
        elif len(data)<self.__format_length__:
            raise PEFormatError('Data length less than expected header length.')

            
        if data.count(chr(0)) == len(data):
            self._all_zeroes = True
            
        self.__unpacked_data_elms__ = struct.unpack(self.__format__, data)
        for i in xrange(len(self.__unpacked_data_elms__)):
            for key in self.__keys__[i]:
#                self.values[key] = self.__unpacked_data_elms__[i]
                setattr(self, key, self.__unpacked_data_elms__[i])


    def __pack__(self):
    
        new_values = []
        
        for i in xrange(len(self.__unpacked_data_elms__)):
        
            for key in self.__keys__[i]:
                new_val = getattr(self, key)
                old_val = self.__unpacked_data_elms__[i]
                
                # In the case of Unions, when the first changed value
                # is picked the loop is exited
                if new_val != old_val:
                    break
                
            new_values.append(new_val)
            
        return struct.pack(self.__format__, *new_values)
        
                
    def __str__(self):
        return '\n'.join( self.dump() )

    def __repr__(self):
        return '<Structure: %s>' % (' '.join( [' '.join(s.split()) for s in self.dump()] ))
        
        
    def dump(self, indentation=0):
        """Returns a string representation of the structure."""
    
        dump = []
        
        dump.append('[%s]' % self.name)

        # Refer to the __set_format__ method for an explanation
        # of the following construct.
        for keys in self.__keys__:
            for key in keys:

                val = getattr(self, key)
                if isinstance(val, int) or isinstance(val, long):
                    val_str = '0x%-8X' % (val)
                    if key == 'TimeDateStamp' or key == 'dwTimeStamp':
                        try:
                            val_str += ' [%s UTC]' % time.asctime(time.gmtime(val))
                        except exceptions.ValueError, e:
                            val_str += ' [INVALID TIME]'
                else:
                    val_str = ''.join(filter(lambda c:c != '\0', str(val)))

                dump.append('%-30s %s' % (key+':', val_str))

        return dump



class SectionStructure(Structure):
    """Convenience section handling class."""

    def get_data(self, start, length=None):
        """Get data chunk from a section.
        
        Allows to query data from the section by passing the
        addresses where the PE file would be loaded by default.
        It is then possible to retrieve code and data by its real
        addresses as it would be if loaded.
        """

        offset = start - self.VirtualAddress

        if length:
            end = offset+length
        else:
            end = len(self.data)
            
        return self.data[offset:end]


    def get_rva_from_offset(self, offset):
        return offset - self.PointerToRawData + self.VirtualAddress


    def get_offset_from_rva(self, rva):
        return (rva - self.VirtualAddress) + self.PointerToRawData


    def contains_offset(self, offset):
        """Check whether the section contains the file offset provided."""

        if not self.PointerToRawData:
           # bss and other sections containing only uninitialized data must have 0
           # and do not take space in the file
           return False
        return self.PointerToRawData <= offset < self.VirtualAddress + self.SizeOfRawData


    def contains_rva(self, rva):
        """Check whether the section contains the address provided."""

        # PECOFF documentation v8 says:
        # The total size of the section when loaded into memory.
        # If this value is greater than SizeOfRawData, the section is zero-padded.
        # This field is valid only for executable images and should be set to zero
        # for object files.

        if len(self.data) < self.SizeOfRawData:
            size = self.Misc_VirtualSize
        else:
            size = max(self.SizeOfRawData, self.Misc_VirtualSize)
            
        return self.VirtualAddress <= rva < self.VirtualAddress + size

    def contains(self, rva):
        #print "DEPRECATION WARNING: you should use contains_rva() instead of contains()"
        return self.contains_rva(rva)


    def set_data(self, data):
        """Set the data belonging to the section."""
        
        self.data = data
        
        
    def get_entropy(self):
        """Calculate and return the entropy for the section."""
        
        return self.entropy_H( self.data )
        

    def get_hash_sha1(self):
        """Get the SHA-1 hex-digest of the section's data."""
        
        if sha1 is not None:
            return sha1( self.data ).hexdigest()
    

    def get_hash_sha256(self):
        """Get the SHA-256 hex-digest of the section's data."""
        
        if sha256 is not None:
            return sha256( self.data ).hexdigest()
    

    def get_hash_sha512(self):
        """Get the SHA-512 hex-digest of the section's data."""
        
        if sha512 is not None:
            return sha512( self.data ).hexdigest()
    

    def get_hash_md5(self):
        """Get the MD5 hex-digest of the section's data."""
        
        if md5 is not None:
            return md5( self.data ).hexdigest()
    

    def entropy_H(self, data):
        """Calculate the entropy of a chunk of data."""

        if len(data) == 0:
            return 0.0
    
        occurences = array.array('L', [0]*256)
    
        for x in data:
            occurences[ord(x)] += 1
    
        entropy = 0
        for x in occurences:
            if x:
                p_x = float(x) / len(data)
                entropy -= p_x*math.log(p_x, 2)
    
        return entropy



class DataContainer:
    """Generic data container."""
	
    def __init__(self, **args):
        for key, value in args.items():
            setattr(self, key, value)



class ImportDescData(DataContainer):
    """Holds import descriptor information.
    
    dll:        name of the imported DLL
    imports:    list of imported symbols (ImportData instances)
    struct:     IMAGE_IMPORT_DESCRIPTOR sctruture
    """

class ImportData(DataContainer):
    """Holds imported symbol's information.
    
    ordinal:    Ordinal of the symbol
    name:       Name of the symbol
    bound:      If the symbol is bound, this contains
                the address.
    """
    
class ExportDirData(DataContainer):
    """Holds export directory information.
                    
    struct:     IMAGE_EXPORT_DIRECTORY structure
    symbols:    list of exported symbols (ExportData instances)
"""
    
class ExportData(DataContainer):
    """Holds exported symbols' information.
    
    ordinal:    ordinal of the symbol
    address:    address of the symbol
    name:       name of the symbol (None if the symbol is
                exported by ordinal only)
    forwarder:  if the symbol is forwarded it will
                contain the name of the target symbol,
                None otherwise.
    """
              

class ResourceDirData(DataContainer):
    """Holds resource directory information.
    
    struct:     IMAGE_RESOURCE_DIRECTORY structure
    entries:    list of entries (ResourceDirEntryData instances)
    """
    
class ResourceDirEntryData(DataContainer):
    """Holds resource directory entry data.
    
    struct:     IMAGE_RESOURCE_DIRECTORY_ENTRY structure
    name:       If the resource is identified by name this
                attribute will contain the name string. None
                otherwise. If identified by id, the id is
                availabe at 'struct.Id'
    id:         the id, also in struct.Id
    directory:  If this entry has a lower level directory
                this attribute will point to the
                ResourceDirData instance representing it.
    data:       If this entry has no futher lower directories
                and points to the actual resource data, this
                attribute will reference the corresponding
                ResourceDataEntryData instance.
    (Either of the 'directory' or 'data' attribute will exist,
    but not both.)
    """

class ResourceDataEntryData(DataContainer):
    """Holds resource data entry information.
    
    struct:     IMAGE_RESOURCE_DATA_ENTRY structure
    lang:       Primary language ID
    sublang:    Sublanguage ID
    """

class DebugData(DataContainer):
    """Holds debug information.
    
    struct:     IMAGE_DEBUG_DIRECTORY structure
    """

class BaseRelocationData(DataContainer):
    """Holds base relocation information.
    
    struct:     IMAGE_BASE_RELOCATION structure
    entries:    list of relocation data (RelocationData instances)
    """
    
class RelocationData(DataContainer):
    """Holds relocation information.
    
    type:       Type of relocation
                The type string is can be obtained by
                RELOCATION_TYPE[type]
    rva:        RVA of the relocation
    """

class TlsData(DataContainer):
    """Holds TLS information.
    
    struct:     IMAGE_TLS_DIRECTORY structure
    """

class BoundImportDescData(DataContainer):
    """Holds bound import descriptor data.
    
    This directory entry will provide with information on the
    DLLs this PE files has been bound to (if bound at all).
    The structure will contain the name and timestamp of the
    DLL at the time of binding so that the loader can know
    whether it differs from the one currently present in the
    system and must, therefore, re-bind the PE's imports.
    
    struct:     IMAGE_BOUND_IMPORT_DESCRIPTOR structure
    name:       DLL name
    entries:    list of entries (BoundImportRefData instances)
                the entries will exist if this DLL has forwarded
                symbols. If so, the destination DLL will have an
                entry in this list.
    """

class BoundImportRefData(DataContainer):
    """Holds bound import forwader reference data.
    
    Contains the same information as the bound descriptor but
    for forwarded DLLs, if any.
    
    struct:     IMAGE_BOUND_FORWARDER_REF structure
    name:       dll name
    """


class PE:
    """A Portable Executable representation.
    
    This class provides access to most of the information in a PE file.
    
    It expects to be supplied the name of the file to load or PE data
    to process and an optional argument 'fast_load' (False by default)
    which controls whether to load all the directories information,
    which can be quite time consuming.
    
    pe = pefile.PE('module.dll')
    pe = pefile.PE(name='module.dll')
    
    would load 'module.dll' and process it. If the data would be already
    available in a buffer the same could be achieved with:
    
    pe = pefile.PE(data=module_dll_data)
    
    The "fast_load" can be set to a default by setting its value in the
    module itself by means,for instance, of a "pefile.fast_load = True".
    That will make all the subsequent instances not to load the
    whole PE structure. The "full_load" method can be used to parse
    the missing data at a later stage.
    
    Basic headers information will be available in the attributes:
    
    DOS_HEADER
    NT_HEADERS
    FILE_HEADER
    OPTIONAL_HEADER
    
    All of them will contain among their attrbitues the members of the
    corresponding structures as defined in WINNT.H
    
    The raw data corresponding to the header (from the beginning of the
    file up to the start of the first section) will be avaiable in the
    instance's attribute 'header' as a string.
    
    The sections will be available as a list in the 'sections' attribute.
    Each entry will contain as attributes all the structure's members.
    
    Directory entries will be available as attributes (if they exist):
    (no other entries are processed at this point)
    
    DIRECTORY_ENTRY_IMPORT (list of ImportDescData instances)
    DIRECTORY_ENTRY_EXPORT (ExportDirData instance)
    DIRECTORY_ENTRY_RESOURCE (ResourceDirData instance)
    DIRECTORY_ENTRY_DEBUG (list of DebugData instances)
    DIRECTORY_ENTRY_BASERELOC (list of BaseRelocationData instances)
    DIRECTORY_ENTRY_TLS 
    DIRECTORY_ENTRY_BOUND_IMPORT (list of BoundImportData instances)
    
    The following dictionary attributes provide ways of mapping different
    constants. They will accept the numeric value and return the string
    representation and the opposite, feed in the string and get the
    numeric constant:
    
    DIRECTORY_ENTRY
    IMAGE_CHARACTERISTICS
    SECTION_CHARACTERISTICS
    DEBUG_TYPE
    SUBSYSTEM_TYPE
    MACHINE_TYPE
    RELOCATION_TYPE
    RESOURCE_TYPE
    LANG
    SUBLANG
    """

    #
    # Format specifications for PE structures.
    #
    
    __IMAGE_DOS_HEADER_format__ = ('IMAGE_DOS_HEADER',
        ('H,e_magic', 'H,e_cblp', 'H,e_cp',
        'H,e_crlc', 'H,e_cparhdr', 'H,e_minalloc',
        'H,e_maxalloc', 'H,e_ss', 'H,e_sp', 'H,e_csum',
        'H,e_ip', 'H,e_cs', 'H,e_lfarlc', 'H,e_ovno', '8s,e_res',
        'H,e_oemid', 'H,e_oeminfo', '20s,e_res2',
        'L,e_lfanew'))
        
    __IMAGE_FILE_HEADER_format__ = ('IMAGE_FILE_HEADER',
        ('H,Machine', 'H,NumberOfSections',
        'L,TimeDateStamp', 'L,PointerToSymbolTable',
        'L,NumberOfSymbols', 'H,SizeOfOptionalHeader',
        'H,Characteristics'))
        
    __IMAGE_DATA_DIRECTORY_format__ = ('IMAGE_DATA_DIRECTORY',
        ('L,VirtualAddress', 'L,Size'))
    
    
    __IMAGE_OPTIONAL_HEADER_format__ = ('IMAGE_OPTIONAL_HEADER',
        ('H,Magic', 'B,MajorLinkerVersion',
        'B,MinorLinkerVersion', 'L,SizeOfCode',
        'L,SizeOfInitializedData', 'L,SizeOfUninitializedData',
        'L,AddressOfEntryPoint', 'L,BaseOfCode', 'L,BaseOfData',
        'L,ImageBase', 'L,SectionAlignment', 'L,FileAlignment',
        'H,MajorOperatingSystemVersion', 'H,MinorOperatingSystemVersion',
        'H,MajorImageVersion', 'H,MinorImageVersion',
        'H,MajorSubsystemVersion', 'H,MinorSubsystemVersion',
        'L,Reserved1', 'L,SizeOfImage', 'L,SizeOfHeaders',
        'L,CheckSum', 'H,Subsystem', 'H,DllCharacteristics',
        'L,SizeOfStackReserve', 'L,SizeOfStackCommit',
        'L,SizeOfHeapReserve', 'L,SizeOfHeapCommit',
        'L,LoaderFlags', 'L,NumberOfRvaAndSizes' ))


    __IMAGE_OPTIONAL_HEADER64_format__ = ('IMAGE_OPTIONAL_HEADER64',
        ('H,Magic', 'B,MajorLinkerVersion',
        'B,MinorLinkerVersion', 'L,SizeOfCode',
        'L,SizeOfInitializedData', 'L,SizeOfUninitializedData',
        'L,AddressOfEntryPoint', 'L,BaseOfCode',
        'Q,ImageBase', 'L,SectionAlignment', 'L,FileAlignment',
        'H,MajorOperatingSystemVersion', 'H,MinorOperatingSystemVersion',
        'H,MajorImageVersion', 'H,MinorImageVersion',
        'H,MajorSubsystemVersion', 'H,MinorSubsystemVersion',
        'L,Reserved1', 'L,SizeOfImage', 'L,SizeOfHeaders',
        'L,CheckSum', 'H,Subsystem', 'H,DllCharacteristics',
        'Q,SizeOfStackReserve', 'Q,SizeOfStackCommit',
        'Q,SizeOfHeapReserve', 'Q,SizeOfHeapCommit',
        'L,LoaderFlags', 'L,NumberOfRvaAndSizes' ))

        
    __IMAGE_NT_HEADERS_format__ = ('IMAGE_NT_HEADERS', ('L,Signature',))
        
    __IMAGE_SECTION_HEADER_format__ = ('IMAGE_SECTION_HEADER',
        ('8s,Name', 'L,Misc,Misc_PhysicalAddress,Misc_VirtualSize',
        'L,VirtualAddress', 'L,SizeOfRawData', 'L,PointerToRawData',
        'L,PointerToRelocations', 'L,PointerToLinenumbers',
        'H,NumberOfRelocations', 'H,NumberOfLinenumbers',
        'L,Characteristics'))

    __IMAGE_DELAY_IMPORT_DESCRIPTOR_format__ = ('IMAGE_DELAY_IMPORT_DESCRIPTOR',
        ('L,grAttrs', 'L,szName', 'L,phmod', 'L,pIAT', 'L,pINT',
        'L,pBoundIAT', 'L,pUnloadIAT', 'L,dwTimeStamp'))

    __IMAGE_IMPORT_DESCRIPTOR_format__ =  ('IMAGE_IMPORT_DESCRIPTOR',
        ('L,OriginalFirstThunk,Characteristics',
        'L,TimeDateStamp', 'L,ForwarderChain', 'L,Name', 'L,FirstThunk'))

    __IMAGE_EXPORT_DIRECTORY_format__ =  ('IMAGE_EXPORT_DIRECTORY',
        ('L,Characteristics',
        'L,TimeDateStamp', 'H,MajorVersion', 'H,MinorVersion', 'L,Name',
        'L,Base', 'L,NumberOfFunctions', 'L,NumberOfNames',
        'L,AddressOfFunctions', 'L,AddressOfNames', 'L,AddressOfNameOrdinals'))

    __IMAGE_RESOURCE_DIRECTORY_format__ = ('IMAGE_RESOURCE_DIRECTORY',
        ('L,Characteristics',
        'L,TimeDateStamp', 'H,MajorVersion', 'H,MinorVersion',
        'H,NumberOfNamedEntries', 'H,NumberOfIdEntries'))

    __IMAGE_RESOURCE_DIRECTORY_ENTRY_format__ = ('IMAGE_RESOURCE_DIRECTORY_ENTRY',
        ('L,Name',
        'L,OffsetToData'))
            
    __IMAGE_RESOURCE_DATA_ENTRY_format__ = ('IMAGE_RESOURCE_DATA_ENTRY',
        ('L,OffsetToData', 'L,Size', 'L,CodePage', 'L,Reserved'))
    
    __VS_VERSIONINFO_format__ = ( 'VS_VERSIONINFO',
        ('H,Length', 'H,ValueLength', 'H,Type' ))
    
    __VS_FIXEDFILEINFO_format__ = ( 'VS_FIXEDFILEINFO',
        ('L,Signature', 'L,StrucVersion', 'L,FileVersionMS', 'L,FileVersionLS',
         'L,ProductVersionMS', 'L,ProductVersionLS', 'L,FileFlagsMask', 'L,FileFlags',
         'L,FileOS', 'L,FileType', 'L,FileSubtype', 'L,FileDateMS', 'L,FileDateLS'))
    
    __StringFileInfo_format__ = ( 'StringFileInfo',
        ('H,Length', 'H,ValueLength', 'H,Type' ))
    
    __StringTable_format__ = ( 'StringTable',
        ('H,Length', 'H,ValueLength', 'H,Type' ))
    
    __String_format__ = ( 'String',
        ('H,Length', 'H,ValueLength', 'H,Type' ))
    
    __Var_format__ = ( 'Var', ('H,Length', 'H,ValueLength', 'H,Type' ))

    __IMAGE_THUNK_DATA_format__ = ('IMAGE_THUNK_DATA',
        ('L,ForwarderString,Function,Ordinal,AddressOfData',))

    __IMAGE_THUNK_DATA64_format__ = ('IMAGE_THUNK_DATA',
        ('Q,ForwarderString,Function,Ordinal,AddressOfData',))

    __IMAGE_DEBUG_DIRECTORY_format__ = ('IMAGE_DEBUG_DIRECTORY',
        ('L,Characteristics', 'L,TimeDateStamp', 'H,MajorVersion',
        'H,MinorVersion', 'L,Type', 'L,SizeOfData', 'L,AddressOfRawData',
        'L,PointerToRawData'))
    
    __IMAGE_BASE_RELOCATION_format__ = ('IMAGE_BASE_RELOCATION',
        ('L,VirtualAddress', 'L,SizeOfBlock') )

    __IMAGE_TLS_DIRECTORY_format__ = ('IMAGE_TLS_DIRECTORY',
        ('L,StartAddressOfRawData', 'L,EndAddressOfRawData',
        'L,AddressOfIndex', 'L,AddressOfCallBacks',
        'L,SizeOfZeroFill', 'L,Characteristics' ) )

    __IMAGE_TLS_DIRECTORY64_format__ = ('IMAGE_TLS_DIRECTORY',
        ('Q,StartAddressOfRawData', 'Q,EndAddressOfRawData',
        'Q,AddressOfIndex', 'Q,AddressOfCallBacks',
        'L,SizeOfZeroFill', 'L,Characteristics' ) )

    __IMAGE_BOUND_IMPORT_DESCRIPTOR_format__ = ('IMAGE_BOUND_IMPORT_DESCRIPTOR',
        ('L,TimeDateStamp', 'H,OffsetModuleName', 'H,NumberOfModuleForwarderRefs'))

    __IMAGE_BOUND_FORWARDER_REF_format__ = ('IMAGE_BOUND_FORWARDER_REF',
        ('L,TimeDateStamp', 'H,OffsetModuleName', 'H,Reserved') )


    def __init__(self, name=None, data=None, fast_load=None):
    
        self.sections = []
        
        self.__warnings = []
        
        self.PE_TYPE = None
        
        if  not name and not data:
            return
            
        # This list will keep track of all the structures created.
        # That will allow for an easy iteration through the list
        # in order to save the modifications made
        self.__structures__ = []

        if not fast_load:
            fast_load = globals()['fast_load']
        self.__parse__(name, data, fast_load)
                    
        
    
    def __unpack_data__(self, format, data, file_offset):
        """Apply structure format to raw data.
        
        Returns and unpacked structure object if successful, None otherwise.
        """
    
        structure = Structure(format, file_offset=file_offset)
        #if len(data) < structure.sizeof():
        #    return None
    
        try:
            structure.__unpack__(data)
        except PEFormatError, err:
            self.__warnings.append(
                'Corrupt header "%s" at file offset %d. Exception: %s' % (
                    format[0], file_offset, str(err))  )
            return None

        self.__structures__.append(structure)
    
        return structure
        

        
    def __parse__(self, fname, data, fast_load):
        """Parse a Portable Executable file.
        
        Loads a PE file, parsing all its structures and making them available
        through the instance's attributes.
        """
        
        if fname:
            fd = file(fname, 'rb')
            self.__data__ = fd.read()
            fd.close()
        elif data:
            self.__data__ = data
        

        self.DOS_HEADER = self.__unpack_data__(
            self.__IMAGE_DOS_HEADER_format__,
            self.__data__, file_offset=0)
            
        if not self.DOS_HEADER or self.DOS_HEADER.e_magic != IMAGE_DOS_SIGNATURE:
            raise PEFormatError('DOS Header magic not found.')

        # OC Patch:
        # Check for sane value in e_lfanew
        #                
        if self.DOS_HEADER.e_lfanew > len(self.__data__):
            raise PEFormatError('Invalid e_lfanew value, probably not a PE file')

        nt_headers_offset = self.DOS_HEADER.e_lfanew

        self.NT_HEADERS = self.__unpack_data__(
            self.__IMAGE_NT_HEADERS_format__,
            self.__data__[nt_headers_offset:],
            file_offset = nt_headers_offset)

        # We better check the signature right here, before the file screws
        # around with sections:
        # OC Patch:
        # Some malware will cause the Signature value to not exist at all
        if not self.NT_HEADERS or not self.NT_HEADERS.Signature:
            raise PEFormatError('NT Headers not found.')

        if self.NT_HEADERS.Signature != IMAGE_NT_SIGNATURE:
            raise PEFormatError('Invalid NT Headers signature.')
                
        self.FILE_HEADER = self.__unpack_data__(
            self.__IMAGE_FILE_HEADER_format__,
            self.__data__[nt_headers_offset+4:],
            file_offset = nt_headers_offset+4)
        image_flags = self.retrieve_flags(IMAGE_CHARACTERISTICS, 'IMAGE_FILE_')
        
        if not self.FILE_HEADER:
            raise PEFormatError('File Header missing')

        # Set the image's flags according the the Characteristics member
        self.set_flags(self.FILE_HEADER, self.FILE_HEADER.Characteristics, image_flags)
        
        optional_header_offset =    \
            nt_headers_offset+4+self.FILE_HEADER.sizeof()

        # Note: location of sections can be controlled from PE header:
        sections_offset = optional_header_offset + self.FILE_HEADER.SizeOfOptionalHeader

        self.OPTIONAL_HEADER = self.__unpack_data__(
            self.__IMAGE_OPTIONAL_HEADER_format__,
            self.__data__[optional_header_offset:],
            file_offset = optional_header_offset)

        # According to solardesigner's findings for his
        # Tiny PE project, the optional header does not
        # need fields beyond "Subsystem" in order to be
        # loadable by the Windows loader (given that zeroes
        # are acceptable values and the header is loaded
        # in a zeroed memory page)
        # If trying to parse a full Optional Header fails
        # we try to parse it again with some 0 padding
        #
        MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE = 69
        
        if ( self.OPTIONAL_HEADER is None and 
            len(self.__data__[optional_header_offset:])
                >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE ):
        
            # Add enough zeroes to make up for the unused fields
            #
            padding_length = 128
            
            # Create padding
            #
            padded_data = self.__data__[optional_header_offset:] + (
                '\0' * padding_length)
            
            self.OPTIONAL_HEADER = self.__unpack_data__(
                self.__IMAGE_OPTIONAL_HEADER_format__,
                padded_data,
                file_offset = optional_header_offset)
         
            
        # Check the Magic in the OPTIONAL_HEADER and set the PE file
        # type accordingly
        #
        if self.OPTIONAL_HEADER is not None:
        
            if self.OPTIONAL_HEADER.Magic == OPTIONAL_HEADER_MAGIC_PE:
            
                self.PE_TYPE = OPTIONAL_HEADER_MAGIC_PE
                
            elif self.OPTIONAL_HEADER.Magic == OPTIONAL_HEADER_MAGIC_PE_PLUS:
    
                self.PE_TYPE = OPTIONAL_HEADER_MAGIC_PE_PLUS
            
                self.OPTIONAL_HEADER = self.__unpack_data__(
                    self.__IMAGE_OPTIONAL_HEADER64_format__,
                    self.__data__[optional_header_offset:],
                    file_offset = optional_header_offset)

                # Again, as explained above, we try to parse
                # a reduced form of the Optional Header which
                # is still valid despite not including all
                # structure members
                #
                MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE = 69+4

                if ( self.OPTIONAL_HEADER is None and 
                    len(self.__data__[optional_header_offset:])
                        >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE ):
                
                    padding_length = 128
                    padded_data = self.__data__[optional_header_offset:] + (
                        '\0' * padding_length)
                    self.OPTIONAL_HEADER = self.__unpack_data__(
                        self.__IMAGE_OPTIONAL_HEADER64_format__,
                        padded_data,
                        file_offset = optional_header_offset)
        
        
        if not self.FILE_HEADER:
            raise PEFormatError('File Header missing')


        # OC Patch:
        # Die gracefully if there is no OPTIONAL_HEADER field
        # 975440f5ad5e2e4a92c4d9a5f22f75c1
        if self.PE_TYPE is None or self.OPTIONAL_HEADER is None:
            raise PEFormatError("No Optional Header found, invalid PE32 or PE32+ file")
            
        dll_characteristics_flags = self.retrieve_flags(DLL_CHARACTERISTICS, 'IMAGE_DLL_CHARACTERISTICS_')

        # Set the Dll Characteristics flags according the the DllCharacteristics member
        self.set_flags(
            self.OPTIONAL_HEADER,
            self.OPTIONAL_HEADER.DllCharacteristics,
            dll_characteristics_flags)


        self.OPTIONAL_HEADER.DATA_DIRECTORY = []
        #offset = (optional_header_offset + self.FILE_HEADER.SizeOfOptionalHeader)
        offset = (optional_header_offset + self.OPTIONAL_HEADER.sizeof())
            
        
        self.NT_HEADERS.FILE_HEADER = self.FILE_HEADER
        self.NT_HEADERS.OPTIONAL_HEADER = self.OPTIONAL_HEADER
            

        # The NumberOfRvaAndSizes is sanitized to stay within 
        # reasonable limits so can be casted to an int
        #
        if self.OPTIONAL_HEADER.NumberOfRvaAndSizes > 0x10:
            self.__warnings.append(
                'Suspicious NumberOfRvaAndSizes in the Optional Header. ' +
                'Normal values are never larger than 0x10, the value is: 0x%x' %
                self.OPTIONAL_HEADER.NumberOfRvaAndSizes )
                
        for i in xrange(int(0x7fffffffL & self.OPTIONAL_HEADER.NumberOfRvaAndSizes)):

            if len(self.__data__[offset:]) == 0:
                break
                        
            if len(self.__data__[offset:]) < 8:
                data = self.__data__[offset:]+'\0'*8
            else:
                data = self.__data__[offset:]

            dir_entry = self.__unpack_data__(
                self.__IMAGE_DATA_DIRECTORY_format__,
                data,
                file_offset = offset)
                
            if dir_entry is None:
                break

            # Would fail if missing an entry
            # 1d4937b2fa4d84ad1bce0309857e70ca offending sample
            try:
                dir_entry.name = DIRECTORY_ENTRY[i]
            except (KeyError, AttributeError):
                break

            offset += dir_entry.sizeof()
            
            self.OPTIONAL_HEADER.DATA_DIRECTORY.append(dir_entry)

            # If the offset goes outside the optional header,
            # the loop is broken, regardless of how many directories
            # NumberOfRvaAndSizes says there are
            #
            # We assume a normally sized optional header, hence that we do
            # a sizeof() instead of reading SizeOfOptionalHeader.
            # Then we add a default number of drectories times their size,
            # if we go beyond that, we assume the number of directories
            # is wrong and stop processing
            if offset >= (optional_header_offset + 
                self.OPTIONAL_HEADER.sizeof() + 8*16) :
                
                break
                
                        
        offset = self.parse_sections(sections_offset)
        
        # OC Patch:
        # There could be a problem if there are no raw data sections
        # greater than 0
        # fc91013eb72529da005110a3403541b6 example
        # Should this throw an exception in the minimum header offset
        # can't be found?
        #
        rawDataPointers = [
            s.PointerToRawData for s in self.sections if s.PointerToRawData>0]
            
        if len(rawDataPointers) > 0:
            lowest_section_offset = min(rawDataPointers)
        else:
            lowest_section_offset = None

        if not lowest_section_offset or lowest_section_offset<offset:
            self.header = self.__data__[:offset]
        else:
            self.header = self.__data__[:lowest_section_offset]
        

        # Check whether the entry point lies within a section
        #
        if self.get_section_by_rva(self.OPTIONAL_HEADER.AddressOfEntryPoint) is not None:
        
            # Check whether the entry point lies within the file
            #
            ep_offset = self.get_offset_from_rva(self.OPTIONAL_HEADER.AddressOfEntryPoint)
            if ep_offset > len(self.__data__):
            
                self.__warnings.append(
                    'Possibly corrupt file. AddressOfEntryPoint lies outside the file. ' +
                    'AddressOfEntryPoint: 0x%x' %
                    self.OPTIONAL_HEADER.AddressOfEntryPoint )
            
        else:

            self.__warnings.append(
                'AddressOfEntryPoint lies outside the sections\' boundaries. ' +
                'AddressOfEntryPoint: 0x%x' %
                self.OPTIONAL_HEADER.AddressOfEntryPoint )
                
        
        if not fast_load:
            self.parse_data_directories()


    def get_warnings(self):
        """Return the list of warnings.
        
        Non-critical problems found when parsing the PE file are
        appended to a list of warnings. This method returns the
        full list.
        """
    
        return self.__warnings
        
        
    def show_warnings(self):
        """Print the list of warnings.
        
        Non-critical problems found when parsing the PE file are
        appended to a list of warnings. This method prints the
        full list to standard output.
        """
    
        for warning in self.__warnings:
            print '>', warning


    def full_load(self):
        """Process the data directories.
        
        This mathod will load the data directories which might not have
        been loaded if the "fast_load" option was used.
        """
        
        self.parse_data_directories()
        
        
    def write(self, filename=None):
        """Write the PE file.
        
        This function will process all headers and components
        of the PE file and include all changes made (by just
        assigning to attributes in the PE objects) and write
        the changes back to a file whose name is provided as
        an argument. The filename is optional.
        The data to be written to the file will be returned
        as a 'str' object.
        """
    
        file_data = list(self.__data__)
        for struct in self.__structures__:
        
            struct_data = list(struct.__pack__())
            offset = struct.get_file_offset()
            
            file_data[offset:offset+len(struct_data)] = struct_data
            
        if hasattr(self, 'VS_VERSIONINFO'):
            if hasattr(self, 'FileInfo'):
                for entry in self.FileInfo:
                    if hasattr(entry, 'StringTable'):
                        for st_entry in entry.StringTable:
                            for key, entry in st_entry.entries.items():
                            
                                offsets = st_entry.entries_offsets[key]
                                lengths = st_entry.entries_lengths[key]
                                
                                if len( entry ) > lengths[1]:

                                    uc = zip(
                                            list(entry[:lengths[1]]), ['\0'] * lengths[1] )
                                    l = list()
                                    map(l.extend, uc)

                                    file_data[ 
                                        offsets[1] : offsets[1] + lengths[1]*2 ] = l
                                        
                                else:
                                    
                                    uc = zip(
                                            list(entry), ['\0'] * len(entry) )
                                    l = list()
                                    map(l.extend, uc)

                                    file_data[ 
                                        offsets[1] : offsets[1] + len(entry)*2 ] = l

                                    remainder = lengths[1] - len(entry)
                                    file_data[ 
                                        offsets[1] + len(entry)*2 : 
                                        offsets[1] + lengths[1]*2 ] = [
                                            u'\0' ] * remainder*2

        new_file_data = ''.join( [ chr(ord(c)) for c in file_data ] )

        if filename:
            f = file(filename, 'wb+')
            f.write(new_file_data)
            f.close()

        return new_file_data
        

                
    def parse_sections(self, offset):
        """Fetch the PE file sections.
        
        The sections will be readily available in the "sections" attribute.
        Its attributes will contain all the section information plus "data"
        a buffer containing the section's data.
        
        The "Characteristics" member will be processed and attributes 
        representing the section characteristics (with the 'IMAGE_SCN_'
        string trimmed from the constant's names) will be added to the
        section instance.
        
        Refer to the SectionStructure class for additional info.
        """
        
        self.sections = []
        
        for i in xrange(self.FILE_HEADER.NumberOfSections):
            section = SectionStructure(self.__IMAGE_SECTION_HEADER_format__)
            if not section:
                break
            section_offset = offset + section.sizeof() * i
            section.set_file_offset(section_offset)
            section.__unpack__(self.__data__[section_offset:])
            self.__structures__.append(section)
                        
            if section.SizeOfRawData > len(self.__data__):
                self.__warnings.append(
                    ('Error parsing section %d. ' % i) +
                    'SizeOfRawData is larger than file.')

            if section.PointerToRawData > len(self.__data__):
                self.__warnings.append(
                    ('Error parsing section %d. ' % i) +
                    'PointerToRawData points beyond the end of the file.')

            if section.Misc_VirtualSize > 0x10000000:
                self.__warnings.append(
                    ('Suspicious value found parsing section %d. ' % i) +
                    'VirtualSize is extremely large > 256MiB.')

            if section.VirtualAddress > 0x10000000:
                self.__warnings.append(
                    ('Suspicious value found parsing section %d. ' % i) +
                    'VirtualAddress is beyond 0x10000000.')

            #
            # Some packer used a non-aligned PointerToRawData in the sections,
            # which causes several common tools not to load the section data
            # properly as they blindly read from the indicated offset.
            # It seems that Windows will round the offset down to the largest
            # offset multiple of FileAlignment which is smaller than
            # PointerToRawData. The following code will do the same.
            #
            
            #alignment = self.OPTIONAL_HEADER.FileAlignment
            section_data_start = section.PointerToRawData
            
            if ( self.OPTIONAL_HEADER.FileAlignment != 0 and 
                (section.PointerToRawData % self.OPTIONAL_HEADER.FileAlignment) != 0):
                self.__warnings.append(
                    ('Error parsing section %d. ' % i) +
                    'Suspicious value for FileAlignment in the Optional Header. ' +
                    'Normally the PointerToRawData entry of the sections\' structures ' +
                    'is a multiple of FileAlignment, this might imply the file ' +
                    'is trying to confuse tools which parse this incorrectly')
            
            section_data_end = section_data_start+section.SizeOfRawData
            section.set_data(self.__data__[section_data_start:section_data_end])
            
            section_flags = self.retrieve_flags(SECTION_CHARACTERISTICS, 'IMAGE_SCN_')
            
            # Set the section's flags according the the Characteristics member
            self.set_flags(section, section.Characteristics, section_flags)

            if ( section.__dict__.get('IMAGE_SCN_MEM_WRITE', False)  and 
                section.__dict__.get('IMAGE_SCN_MEM_EXECUTE', False) ):
                
                self.__warnings.append(
                    ('Suspicious flags set for section %d. ' % i) +
                    'Both IMAGE_SCN_MEM_WRITE and IMAGE_SCN_MEM_EXECUTE are set.' +
                    'This might indicate a packed executable.')

            self.sections.append(section)
            
        if self.FILE_HEADER.NumberOfSections > 0 and self.sections:
            return offset + self.sections[0].sizeof()*self.FILE_HEADER.NumberOfSections
        else:
            return offset

        
    def retrieve_flags(self, flag_dict, flag_filter):
        """Read the flags from a dictionary and return them in a usable form.
        
        Will return a list of (flag, value) for all flags in "flag_dict"
        matching the filter "flag_filter".
        """
        
        return [(f[0], f[1]) for f in flag_dict.items() if
                isinstance(f[0], str) and f[0].startswith(flag_filter)]

                
    def set_flags(self, obj, flag_field, flags):
        """Will process the flags and set attributes in the object accordingly.
        
        The object "obj" will gain attritutes named after the flags provided in
        "flags" and valued True/False, matching the results of applyin each
        flag value from "flags" to flag_field.
        """
    
        for flag in flags:
            if flag[1] & flag_field:
                setattr(obj, flag[0], True)
            else:
                setattr(obj, flag[0], False)
    
    
            
    def parse_data_directories(self):
        """Parse and process the PE file's data directories."""
        
        directory_parsing = (
            ('IMAGE_DIRECTORY_ENTRY_IMPORT', self.parse_import_directory),
            ('IMAGE_DIRECTORY_ENTRY_EXPORT', self.parse_export_directory),
            ('IMAGE_DIRECTORY_ENTRY_RESOURCE', self.parse_resources_directory),
            ('IMAGE_DIRECTORY_ENTRY_DEBUG', self.parse_debug_directory),
            ('IMAGE_DIRECTORY_ENTRY_BASERELOC', self.parse_relocations_directory),
            ('IMAGE_DIRECTORY_ENTRY_TLS', self.parse_directory_tls),
            ('IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT', self.parse_delay_import_directory),
            ('IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT', self.parse_directory_bound_imports) )
            
        for entry in directory_parsing:
            # OC Patch:
            #
            try:
                dir_entry = self.OPTIONAL_HEADER.DATA_DIRECTORY[
                    DIRECTORY_ENTRY[entry[0]]]
            except IndexError:
                break
            if dir_entry.VirtualAddress:
                value = entry[1](dir_entry.VirtualAddress, dir_entry.Size)
                if value:
                    setattr(self, entry[0][6:], value)
        
        
    def parse_directory_bound_imports(self, rva, size):
        """"""
        
        bnd_descr = Structure(self.__IMAGE_BOUND_IMPORT_DESCRIPTOR_format__)
        bnd_descr_size = bnd_descr.sizeof()
        start = rva
        
        bound_imports = []
        while True:

            bnd_descr = self.__unpack_data__(
                self.__IMAGE_BOUND_IMPORT_DESCRIPTOR_format__,
                   self.__data__[rva:rva+bnd_descr_size],
                   file_offset = rva)
            if bnd_descr is None:
                # If can't parse directory then silently return.
                # This directory does not necesarily have to be valid to
                # still have a valid PE file

                self.__warnings.append(
                    'The Bound Imports directory exists but can\'t be parsed.')

                return
                   
            if bnd_descr.all_zeroes():
                break
                
            rva += bnd_descr.sizeof()
            
            forwarder_refs = []
            for idx in xrange(bnd_descr.NumberOfModuleForwarderRefs):
                # Both structures IMAGE_BOUND_IMPORT_DESCRIPTOR and
                # IMAGE_BOUND_FORWARDER_REF have the same size.
                bnd_frwd_ref = self.__unpack_data__(
                    self.__IMAGE_BOUND_FORWARDER_REF_format__,
                    self.__data__[rva:rva+bnd_descr_size],
                    file_offset = rva)
                # OC Patch:
                if not bnd_frwd_ref:
                    raise PEFormatError(
                        "IMAGE_BOUND_FORWARDER_REF cannot be read")
                rva += bnd_frwd_ref.sizeof()
                
                name_str =  self.get_string_from_data(
                    start+bnd_frwd_ref.OffsetModuleName, self.__data__)
                    
                if not name_str:
                    break
                forwarder_refs.append(BoundImportRefData(
                    struct = bnd_frwd_ref,
                    name = name_str))
                
            name_str = self.get_string_from_data(
                start+bnd_descr.OffsetModuleName, self.__data__)
                
            if not name_str:
                break
            bound_imports.append(
                BoundImportDescData(
                    struct = bnd_descr,
                    name = name_str,
                    entries = forwarder_refs))
                    
        return bound_imports

        
    def parse_directory_tls(self, rva, size):
        """"""
            
        if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
            format = self.__IMAGE_TLS_DIRECTORY_format__
            
        elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
            format = self.__IMAGE_TLS_DIRECTORY64_format__

        tls_struct = self.__unpack_data__(
            format,
            self.get_data(rva),
            file_offset = self.get_offset_from_rva(rva))
            
        if not tls_struct:
            return None
                
        return TlsData( struct = tls_struct )
    
    
    def parse_relocations_directory(self, rva, size):
        """"""
        
        rlc = Structure(self.__IMAGE_BASE_RELOCATION_format__)
        rlc_size = rlc.sizeof()
        end = rva+size
        
        relocations = []
        while rva<end:
            
            # OC Patch:
            # Malware that has bad rva entries will cause an error.
            # Just continue on after an exception
            #
            try:
                rlc = self.__unpack_data__(
                    self.__IMAGE_BASE_RELOCATION_format__,
                    self.get_data(rva, rlc_size),
                    file_offset = self.get_offset_from_rva(rva) )
            except PEFormatError:
                self.__warnings.append(
                    'Invalid relocation information. Can\'t read ' +
                    'data at RVA: 0x%x' % rva)
                rlc = None
            
            if not rlc:
                break
                
            reloc_entries = self.parse_relocations(
                rva+rlc_size, rlc.VirtualAddress, rlc.SizeOfBlock-rlc_size)
                
            relocations.append(
                BaseRelocationData(
                    struct = rlc,
                    entries = reloc_entries))
            
            if not rlc.SizeOfBlock:
                break
            rva += rlc.SizeOfBlock
            
        return relocations
    
        
    def parse_relocations(self, data_rva, rva, size):
        """"""
        
        data = self.get_data(data_rva, size)
        
        entries = []
        for idx in xrange(len(data)/2):
            word = struct.unpack('<H', data[idx*2:(idx+1)*2])[0]
            reloc_type = (word>>12)
            reloc_offset = (word&0x0fff)
            entries.append(
                RelocationData(
                    type = reloc_type,
                    rva = reloc_offset+rva))
            
        return entries

        
    def parse_debug_directory(self, rva, size):
        """"""
            
        dbg = Structure(self.__IMAGE_DEBUG_DIRECTORY_format__)
        dbg_size = dbg.sizeof()
        
        debug = []
        for idx in xrange(size/dbg_size):
            try:
                data = self.get_data(rva+dbg_size*idx, dbg_size)
            except PEFormatError, e:
                self.__warnings.append(
                    'Invalid debug information. Can\'t read ' +
                    'data at RVA: 0x%x' % rva)
                return None
                
            dbg = self.__unpack_data__(
                self.__IMAGE_DEBUG_DIRECTORY_format__,
                data, file_offset = self.get_offset_from_rva(rva+dbg_size*idx))
                
            if not dbg:
                return None
                
            debug.append(
                DebugData(
                    struct = dbg))
            
        return debug

                        
    def parse_resources_directory(self, rva, size=0, base_rva = None, level = 0):
        """Parse the resources directory.
        
        Given the rva of the resources directory, it will process all
        its entries.
        
        The root will have the corresponding member of its structure,
        IMAGE_RESOURCE_DIRECTORY plus 'entries', a list of all the
        entries in the directory.
        
        Those entries will have, correspondingly, all the structure's
        members (IMAGE_RESOURCE_DIRECTORY_ENTRY) and an additional one,
        "directory", pointing to the IMAGE_RESOURCE_DIRECTORY structure
        representing upper layers of the tree. This one will also have
        an 'entries' attribute, pointing to the 3rd, and last, level.
        Another directory with more entries. Those last entries will
        have a new atribute (both 'leaf' or 'data_entry' can be used to
        access it). This structure finally points to the resource data.
        All the members of this structure, IMAGE_RESOURCE_DATA_ENTRY,
        are available as its attributes.
        """
        
        # OC Patch:
        original_rva = rva
        
        if base_rva is None:
            base_rva = rva
        
        resources_section = self.get_section_by_rva(rva)
        
        try:
            # If the RVA is invalid all would blow up. Some EXEs seem to be
            # specially nasty and have an invalid RVA.
            data = self.get_data(rva)
        except PEFormatError, e:
            self.__warnings.append(
                'Invalid resources directory. Can\'t read ' +
                'directory data at RVA: 0x%x' % rva)
            return None
            
        # Get the resource directory structure, that is, the header
        # of the table preceding the actual entries
        #
        resource_dir = self.__unpack_data__(
            self.__IMAGE_RESOURCE_DIRECTORY_format__, data,
            file_offset = self.get_offset_from_rva(rva) )
        if resource_dir is None:
            # If can't parse resources directory then silently return.
            # This directory does not necesarily have to be valid to
            # still have a valid PE file
            self.__warnings.append(
                'Invalid resources directory. Can\'t parse ' +
                'directory data at RVA: 0x%x' % rva)
            return None
        
        dir_entries = []
        
        # Advance the rva to the positon immediately following the directory
        # table header and pointing to the first entry in the table
        #
        rva += resource_dir.sizeof()
        
        number_of_entries = (
            resource_dir.NumberOfNamedEntries +
            resource_dir.NumberOfIdEntries )
            
        strings_to_postprocess = list()
        
        for idx in xrange(number_of_entries):
            
            res = self.parse_resource_entry(rva)
            if res is None:
                self.__warnings.append(
                    'Error parsing the resources directory, ' +
                    'Entry %d is invalid, RVA = 0x%x. ' % 
                    (idx, rva) )
                break
            

            entry_name = None
            entry_id = None
            
            # If all named entries have been processed, only Id ones
            # remain
            
            if idx >= resource_dir.NumberOfNamedEntries:
                entry_id = res.Name
            else:
                ustr_offset = base_rva+res.NameOffset
                try:
                    #entry_name = self.get_string_u_at_rva(ustr_offset, max_length=16)
                    entry_name = UnicodeStringWrapperPostProcessor(self, ustr_offset)
                    strings_to_postprocess.append(entry_name)
                    
                except PEFormatError, excp:
                    self.__warnings.append(
                        'Error parsing the resources directory, ' +
                        'attempting to read entry name. ' +
                        'Can\'t read unicode string at offset 0x%x' % 
                        (ustr_offset) )
                
                
            if res.DataIsDirectory:
                # OC Patch:
                #
                # One trick malware can do is to recursively reference
                # the next directory. This causes hilarity to ensue when
                # trying to parse everything correctly.
                # If the original RVA given to this function is equal to
                # the next one to parse, we assume that it's a trick.
                # Instead of raising a PEFormatError this would skip some
                # reasonable data so we just break.
                #
                # 9ee4d0a0caf095314fd7041a3e4404dc is the offending sample
                if original_rva == (base_rva + res.OffsetToDirectory):
                    
                    break
                    
                else:
                    entry_directory = self.parse_resources_directory(
                        base_rva+res.OffsetToDirectory,
                        base_rva=base_rva, level = level+1)

                if not entry_directory:
                    break
                dir_entries.append(
                    ResourceDirEntryData(
                        struct = res,
                        name = entry_name,
                        id = entry_id,
                        directory = entry_directory))

            else:
                struct = self.parse_resource_data_entry(
                    base_rva + res.OffsetToDirectory)

                if struct:
                    entry_data = ResourceDataEntryData(
                        struct = struct,
                        lang = res.Name & 0xff,
                        sublang = (res.Name>>8) & 0xff)
                        
                    dir_entries.append(
                        ResourceDirEntryData(
                            struct = res,
                            name = entry_name,
                            id = entry_id,
                            data = entry_data))
                    
                else:
                    break


                
            # Check if this entry contains version information
            #
            if level == 0 and res.Id == RESOURCE_TYPE['RT_VERSION']:
                if len(dir_entries)>0:
                    last_entry = dir_entries[-1]
                    
                rt_version_struct = None
                try:
                    rt_version_struct = last_entry.directory.entries[0].directory.entries[0].data.struct
                except:
                    # Maybe a malformed directory structure...?
                    # Lets ignore it
                    pass
                    
                if rt_version_struct is not None:
                    self.parse_version_information(rt_version_struct)
    
            rva += res.sizeof()
            
                    
        string_rvas = [s.get_rva() for s in strings_to_postprocess]
        string_rvas.sort()
        
        for idx, s in enumerate(strings_to_postprocess):
            s.render_pascal_16()
            
            
        resource_directory_data = ResourceDirData(
            struct = resource_dir,
            entries = dir_entries)
                        
        return resource_directory_data
        
            
    def parse_resource_data_entry(self, rva):
        """Parse a data entry from the resources directory."""
    
        try:
            # If the RVA is invalid all would blow up. Some EXEs seem to be
            # specially nasty and have an invalid RVA.
            data = self.get_data(rva)
        except PEFormatError, excp:
            self.__warnings.append(
                'Error parsing a resource directory data entry, ' +
                'the RVA is invalid: 0x%x' % ( rva ) )
            return None
            
        data_entry = self.__unpack_data__(
            self.__IMAGE_RESOURCE_DATA_ENTRY_format__, data,
            file_offset = self.get_offset_from_rva(rva) )
            
        return data_entry

        
    def parse_resource_entry(self, rva):
        """Parse a directory entry from the resources directory."""

        resource = self.__unpack_data__(
            self.__IMAGE_RESOURCE_DIRECTORY_ENTRY_format__, self.get_data(rva),
            file_offset = self.get_offset_from_rva(rva) )
            
        if resource is None:
            return None
            
        #resource.NameIsString = (resource.Name & 0x80000000L) >> 31
        resource.NameOffset = resource.Name & 0x7FFFFFFFL
        
        resource.__pad = resource.Name & 0xFFFF0000L
        resource.Id = resource.Name & 0x0000FFFFL
        
        resource.DataIsDirectory = (resource.OffsetToData & 0x80000000L) >> 31
        resource.OffsetToDirectory = resource.OffsetToData & 0x7FFFFFFFL
        
        return resource
            
            
    def parse_version_information(self, version_struct):
        """Parse version information structure.
        
        The date will be made available in three attributes of the PE object.
        
        VS_VERSIONINFO     will contain the first three fields of the main structure:
            'Length', 'ValueLength', and 'Type'
            
        VS_FIXEDFILEINFO    will hold the rest of the fields, accessible as sub-attributes:
            'Signature', 'StrucVersion', 'FileVersionMS', 'FileVersionLS',
            'ProductVersionMS', 'ProductVersionLS', 'FileFlagsMask', 'FileFlags',
            'FileOS', 'FileType', 'FileSubtype', 'FileDateMS', 'FileDateLS'
            
        FileInfo    is a list of all StringFileInfo and VarFileInfo structures.
        
        StringFileInfo structures will have a list as an attribute named 'StringTable'
        containing all the StringTable structures. Each of those structures contains a 
        dictionary 'entries' with all the key/value version information string pairs.
        
        VarFileInfo structures will have a list as an attribute named 'Var' containing
        all Var structures. Each Var structure will have a dictionary as an attribute
        named 'entry' which will contain the name and value of the Var.
        """
    
    
        # Retrieve the data for the version info resource
        #
        start_offset = self.get_offset_from_rva( version_struct.OffsetToData )
        raw_data = self.__data__[ start_offset : start_offset+version_struct.Size ]
        
        
        # Map the main structure and the subsequent string
        #    
        versioninfo_struct = self.__unpack_data__(
            self.__VS_VERSIONINFO_format__, raw_data, 
            file_offset = start_offset )
            
        if versioninfo_struct is None:
            return 
            
        ustr_offset = version_struct.OffsetToData + versioninfo_struct.sizeof()
        try:
            versioninfo_string = self.get_string_u_at_rva( ustr_offset )
        except PEFormatError, excp:
            self.__warnings.append(
                'Error parsing the version information, ' +
                'attempting to read VS_VERSION_INFO string. Can\'t ' +
                'read unicode string at offset 0x%x' % (
                ustr_offset ) )
                
            versioninfo_string = None
         
        # If the structure does not contain the expected name, it's assumed to be invalid
        #            
        if versioninfo_string != u'VS_VERSION_INFO':

            self.__warnings.append('Invalid VS_VERSION_INFO block')
            return


        # Set the PE object's VS_VERSIONINFO to this one
        #
        self.VS_VERSIONINFO = versioninfo_struct

        # The the Key attribute to point to the unicode string identifying the structure
        #        
        self.VS_VERSIONINFO.Key = versioninfo_string


        # Process the fixed version information, get the offset and structure
        #
        fixedfileinfo_offset = self.dword_align(
            versioninfo_struct.sizeof() + 2 * (len(versioninfo_string) + 1),
            version_struct.OffsetToData)
        fixedfileinfo_struct = self.__unpack_data__(
            self.__VS_FIXEDFILEINFO_format__,
            raw_data[fixedfileinfo_offset:], 
            file_offset = start_offset+fixedfileinfo_offset )

        if not fixedfileinfo_struct:
            return


        # Set the PE object's VS_FIXEDFILEINFO to this one
        #
        self.VS_FIXEDFILEINFO = fixedfileinfo_struct
        
        
        # Start parsing all the StringFileInfo and VarFileInfo structures
        #
        
        # Get the first one
        #
        stringfileinfo_offset = self.dword_align(
            fixedfileinfo_offset + fixedfileinfo_struct.sizeof(),
            version_struct.OffsetToData)
        original_stringfileinfo_offset = stringfileinfo_offset
        
        
        # Set the PE object's attribute that will contain them all.
        #
        self.FileInfo = list()


        while True:
        
            # Process the StringFileInfo/VarFileInfo struct
            #
            stringfileinfo_struct = self.__unpack_data__(
                self.__StringFileInfo_format__, 
                raw_data[stringfileinfo_offset:], 
                file_offset = start_offset+stringfileinfo_offset )
                
            if stringfileinfo_struct is None:
                self.__warnings.append(
                    'Error parsing StringFileInfo/VarFileInfo struct' )
                return None
            
            # Get the subsequent string defining the structure.
            #
            ustr_offset = ( version_struct.OffsetToData + 
                stringfileinfo_offset + versioninfo_struct.sizeof() )
            try:
                stringfileinfo_string = self.get_string_u_at_rva( ustr_offset )
            except PEFormatError, excp:
                self.__warnings.append(
                    'Error parsing the version information, ' +
                    'attempting to read StringFileInfo string. Can\'t ' +
                    'read unicode string at offset 0x%x' %  ( ustr_offset ) )
                break
        
            # Set such string as the Key attribute
            #
            stringfileinfo_struct.Key = stringfileinfo_string
            
            
            # Append the structure to the PE object's list
            #
            self.FileInfo.append(stringfileinfo_struct)
        
        
            # Parse a StringFileInfo entry
            #
            if stringfileinfo_string == u'StringFileInfo':
                
                if stringfileinfo_struct.Type == 1 and stringfileinfo_struct.ValueLength == 0:
            
                    stringtable_offset = self.dword_align(
                        stringfileinfo_offset + stringfileinfo_struct.sizeof() + 
                            2*(len(stringfileinfo_string)+1),
                        version_struct.OffsetToData)
                  
                    stringfileinfo_struct.StringTable = list()

                    # Process the String Table entries
                    #
                    while True:
                        stringtable_struct = self.__unpack_data__(
                            self.__StringTable_format__,
                            raw_data[stringtable_offset:], 
                            file_offset = start_offset+stringtable_offset )
                            
                        if not stringtable_struct:
                            break
                            
                        ustr_offset = ( version_struct.OffsetToData + stringtable_offset + 
                            stringtable_struct.sizeof() )
                        try:
                            stringtable_string = self.get_string_u_at_rva( ustr_offset )
                        except PEFormatError, excp:
                            self.__warnings.append(
                                'Error parsing the version information, ' +
                                'attempting to read StringTable string. Can\'t ' +
                                'read unicode string at offset 0x%x' % ( ustr_offset ) )
                            break
                        
                        stringtable_struct.LangID = stringtable_string
                        stringtable_struct.entries = dict()
                        stringtable_struct.entries_offsets = dict()
                        stringtable_struct.entries_lengths = dict()
                        stringfileinfo_struct.StringTable.append(stringtable_struct)
            
                        entry_offset = self.dword_align(
                            stringtable_offset + stringtable_struct.sizeof() +
                                2*(len(stringtable_string)+1),
                            version_struct.OffsetToData)
            
                        # Process all entries in the string table
                        #
            
                        while entry_offset < stringtable_offset + stringtable_struct.Length:
                    
                            string_struct = self.__unpack_data__(
                                self.__String_format__, raw_data[entry_offset:], 
                                file_offset = start_offset+entry_offset )
                                
                            if not string_struct:
                                break
                                
                            ustr_offset = ( version_struct.OffsetToData + entry_offset +
                                string_struct.sizeof() )
                            try:
                                key = self.get_string_u_at_rva( ustr_offset )
                                key_offset = self.get_offset_from_rva( ustr_offset )
                            except PEFormatError, excp:
                                self.__warnings.append(
                                    'Error parsing the version information, ' +
                                    'attempting to read StringTable Key string. Can\'t ' +
                                    'read unicode string at offset 0x%x' % ( ustr_offset ) )
                                break
                                
                            value_offset = self.dword_align(
                                2*(len(key)+1) + entry_offset + string_struct.sizeof(),
                                version_struct.OffsetToData)
            
                            ustr_offset = version_struct.OffsetToData + value_offset
                            try:
                                value = self.get_string_u_at_rva( ustr_offset,
                                    max_length = string_struct.ValueLength )
                                value_offset = self.get_offset_from_rva( ustr_offset )
                            except PEFormatError, excp:
                                self.__warnings.append(
                                    'Error parsing the version information, ' +
                                    'attempting to read StringTable Value string. ' +
                                    'Can\'t read unicode string at offset 0x%x' % (
                                    ustr_offset ) )
                                break
                                
                            if string_struct.Length == 0:
                                entry_offset = stringtable_offset + stringtable_struct.Length
                            else:
                                entry_offset = self.dword_align(
                                    string_struct.Length+entry_offset, version_struct.OffsetToData)
                                
                            key_as_char = []
                            for c in key:
                                if ord(c)>128:
                                    key_as_char.append('\\x%02x' %ord(c))
                                else:
                                    key_as_char.append(c)
                            
                            key_as_char = ''.join(key_as_char)

                            setattr(stringtable_struct, key_as_char, value)
                            stringtable_struct.entries[key] = value
                            stringtable_struct.entries_offsets[key] = (key_offset, value_offset)
                            stringtable_struct.entries_lengths[key] = (len(key), len(value))
                            
                    
                        stringtable_offset = self.dword_align(
                            stringtable_struct.Length + stringtable_offset,
                            version_struct.OffsetToData)
                        if stringtable_offset >= stringfileinfo_struct.Length:
                            break
                    
            # Parse a VarFileInfo entry
            #
            elif stringfileinfo_string == u'VarFileInfo':
            
                varfileinfo_struct = stringfileinfo_struct
                varfileinfo_struct.name = 'VarFileInfo'
            
                if varfileinfo_struct.Type == 1 and varfileinfo_struct.ValueLength == 0:
              
                    var_offset = self.dword_align(
                        stringfileinfo_offset + varfileinfo_struct.sizeof() +
                            2*(len(stringfileinfo_string)+1),
                        version_struct.OffsetToData)
                        
                    varfileinfo_struct.Var = list()
              
                    # Process all entries
                    #

                    while True:
                        var_struct = self.__unpack_data__(
                            self.__Var_format__,
                            raw_data[var_offset:], 
                            file_offset = start_offset+var_offset )
                            
                        if not var_struct:
                            break
                            
                        ustr_offset = ( version_struct.OffsetToData + var_offset + 
                            var_struct.sizeof() )
                        try:
                            var_string = self.get_string_u_at_rva( ustr_offset )
                        except PEFormatError, excp:
                            self.__warnings.append(
                                'Error parsing the version information, ' +
                                'attempting to read VarFileInfo Var string. ' +
                                'Can\'t read unicode string at offset 0x%x' % (ustr_offset))
                            break

                        
                        varfileinfo_struct.Var.append(var_struct)
                
                        varword_offset = self.dword_align(
                            2*(len(var_string)+1) + var_offset + var_struct.sizeof(),
                            version_struct.OffsetToData)
                        orig_varword_offset = varword_offset
                            
                        while varword_offset < orig_varword_offset + var_struct.ValueLength:
                            word1 = self.get_word_from_data(
                                raw_data[varword_offset:varword_offset+2], 0)
                            word2 = self.get_word_from_data(
                                raw_data[varword_offset+2:varword_offset+4], 0)
                            varword_offset += 4
        
                            var_struct.entry = {var_string: '0x%04x 0x%04x' % (word1, word2)}

                        var_offset = self.dword_align(
                            var_offset+var_struct.Length, version_struct.OffsetToData)
                            
                        if var_offset <= var_offset+var_struct.Length:
                            break
              
              
              
            # Increment and align the offset
            #
            stringfileinfo_offset = self.dword_align(
                stringfileinfo_struct.Length+stringfileinfo_offset,
                version_struct.OffsetToData)
          
            # Check if all the StringFileInfo and VarFileInfo items have been processed
            #
            if stringfileinfo_struct.Length == 0 or stringfileinfo_offset >= versioninfo_struct.Length:
                break
            
            
                            
    def parse_export_directory(self, rva, size):
        """Parse the export directory.
        
        Given the rva of the export directory, it will process all
        its entries.
        
        The exports will be made available through a list "exports"
        containing a tuple with the following elements:
        
            (ordinal, symbol_address, symbol_name)
            
        And also through a dicionary "exports_by_ordinal" whose keys
        will be the ordinals and the values tuples of the from:
        
            (symbol_address, symbol_name)
            
        The symbol addresses are relative, not absolute.
        """
    
        try:
            export_dir =  self.__unpack_data__(
                self.__IMAGE_EXPORT_DIRECTORY_format__, self.get_data(rva),
                file_offset = self.get_offset_from_rva(rva) )
        except PEFormatError:
            self.__warnings.append(
                'Error parsing export directory at RVA: 0x%x' % ( rva ) )
            return
        
        if not export_dir:
            return
        
        try:
            address_of_names = self.get_data(
                export_dir.AddressOfNames, export_dir.NumberOfNames*4)
            address_of_name_ordinals = self.get_data(
                export_dir.AddressOfNameOrdinals, export_dir.NumberOfNames*4)
            address_of_functions = self.get_data(
                export_dir.AddressOfFunctions, export_dir.NumberOfFunctions*4)
        except PEFormatError:
            self.__warnings.append(
                'Error parsing export directory at RVA: 0x%x' % ( rva ) )
            return
            
        exports = []
        
        for i in xrange(export_dir.NumberOfNames):
        
                
            symbol_name = self.get_string_at_rva(
                self.get_dword_from_data(address_of_names, i))
            
            symbol_ordinal = self.get_word_from_data(
                address_of_name_ordinals, i)
                
            
            if symbol_ordinal*4<len(address_of_functions):
                symbol_address = self.get_dword_from_data(
                    address_of_functions, symbol_ordinal)
            else:
                # Corrupt? a bad pointer... we assume it's all
                # useless, no exports
                return None
            
            # If the funcion's rva points within the export directory
            # it will point to a string with the forwarded symbol's string
            # instead of pointing the the function start address.
            
            if symbol_address>=rva and symbol_address<rva+size:
                forwarder_str = self.get_string_at_rva(symbol_address)
            else:
                forwarder_str = None
        
            
            exports.append(
                ExportData(
                    ordinal = export_dir.Base+symbol_ordinal,
                    address = symbol_address,
                    name = symbol_name,
                    forwarder = forwarder_str))
                    
        ordinals = [exp.ordinal for exp in exports]
        
        for idx in xrange(export_dir.NumberOfFunctions):

            if not idx+export_dir.Base in ordinals:
                symbol_address = self.get_dword_from_data(
                    address_of_functions, 
                    idx)
                
                #
                # Checking for forwarder again.
                #
                if symbol_address>=rva and symbol_address<rva+size:
                    forwarder_str = self.get_string_at_rva(symbol_address)
                else:
                    forwarder_str = None
                    
                exports.append(
                    ExportData(
                        ordinal = export_dir.Base+idx,
                        address = symbol_address,
                        name = None,
                        forwarder = forwarder_str))
                      
        return ExportDirData(
                struct = export_dir,
                symbols = exports)
        
                    
    def dword_align(self, offset, base):
        offset += base
        return (offset+3) - ((offset+3)%4) - base



    def parse_delay_import_directory(self, rva, size):
        """Walk and parse the delay import directory."""
        
        import_descs =  []
        while True:
            try:
                # If the RVA is invalid all would blow up. Some PEs seem to be
                # specially nasty and have an invalid RVA.
                data = self.get_data(rva)
            except PEFormatError, e:
                self.__warnings.append(
                    'Error parsing the Delay import directory at RVA: 0x%x' % ( rva ) )
                break
                
            import_desc =  self.__unpack_data__(
                self.__IMAGE_DELAY_IMPORT_DESCRIPTOR_format__,
                data, file_offset = self.get_offset_from_rva(rva) )
            
            
            # If the structure is all zeores, we reached the end of the list
            if not import_desc or import_desc.all_zeroes():
                break
            
            
            rva += import_desc.sizeof()
            
            try:
                import_data =  self.parse_imports(
                    import_desc.pINT,
                    import_desc.pIAT,
                    None)
            except PEFormatError, e:
                self.__warnings.append(
                    'Error parsing the Delay import directory. ' +
                    'Invalid import data at RVA: 0x%x' % ( rva ) )
                break
            
            if not import_data:
                continue
            
            
            dll = self.get_string_at_rva(import_desc.szName)
            if dll:
                import_descs.append(
                    ImportDescData(
                        struct = import_desc,
                        imports = import_data,
                        dll = dll))
        
        return import_descs

                    

    def parse_import_directory(self, rva, size):
        """Walk and parse the import directory."""

        import_descs =  []
        while True:
            try:
                # If the RVA is invalid all would blow up. Some EXEs seem to be
                # specially nasty and have an invalid RVA.
                data = self.get_data(rva)
            except PEFormatError, e:
                self.__warnings.append(
                    'Error parsing the Import directory at RVA: 0x%x' % ( rva ) )
                break
                
            import_desc =  self.__unpack_data__(
                self.__IMAGE_IMPORT_DESCRIPTOR_format__,
                data, file_offset = self.get_offset_from_rva(rva) )
                
            # If the structure is all zeores, we reached the end of the list
            if not import_desc or import_desc.all_zeroes():
                break
                
            rva += import_desc.sizeof()
                        
            try:
                import_data =  self.parse_imports(
                    import_desc.OriginalFirstThunk,
                    import_desc.FirstThunk,
                    import_desc.ForwarderChain)
            except PEFormatError, excp:
                self.__warnings.append(
                    'Error parsing the Import directory. ' +
                    'Invalid Import data at RVA: 0x%x' % ( rva ) )
                break
                #raise excp
                
            if not import_data:
                continue
                
            dll = self.get_string_at_rva(import_desc.Name)
            if dll:
                import_descs.append(
                    ImportDescData(
                        struct = import_desc,
                        imports = import_data,
                        dll = dll))

        return import_descs
            

    
    def parse_imports(self, original_first_thunk, first_thunk, forwarder_chain):
        """Parse the imported symbols.
        
        It will fill a list, which will be avalable as the dictionary
        attribute "imports". Its keys will be the DLL names and the values
        all the symbols imported from that object.
        """
        
        imported_symbols = []
        imports_section = self.get_section_by_rva(first_thunk)
        if not imports_section:
            raise PEFormatError, 'Invalid/corrupt imports.'
            
        
        # Import Lookup Table. Contains ordinals or pointers to strings.
        ilt = self.get_import_table(original_first_thunk)
        # Import Address Table. May have identical content to ILT if
        # PE file is not bounded, Will contain the address of the
        # imported symbols once the binary is loaded or if it is already
        # bound.
        iat = self.get_import_table(first_thunk)

        # OC Patch:
        # Would crash if iat or ilt had None type 
        if not iat and not ilt:
            raise PEFormatError( 
                'Invalid Import Table information. ' +
                'Both ILT and IAT appear to be broken.')
            
        if not iat and ilt:
            table = ilt
        elif iat and not ilt:
            table = iat
        elif ilt and ((len(ilt) and len(iat)==0) or (len(ilt) == len(iat))):
            table = ilt
        elif (ilt and len(ilt))==0 and (iat and len(iat)):
            table = iat
        else:
            return None
            
        for idx in xrange(len(table)):

            imp_ord = None
            imp_hint = None
            imp_name = None
            hint_name_table_rva = None
                        
            if table[idx].AddressOfData:
            
                if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
                    ordinal_flag = IMAGE_ORDINAL_FLAG
                elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
                    ordinal_flag = IMAGE_ORDINAL_FLAG64
            
                # If imported by ordinal, we will append the ordinal number
                #
                if table[idx].AddressOfData & ordinal_flag:
                    import_by_ordinal = True
                    imp_ord = table[idx].AddressOfData & 0xffff
                    imp_name = None
                else:
                    import_by_ordinal = False
                    try:
                        hint_name_table_rva = table[idx].AddressOfData & 0x7fffffff
                        data = self.get_data(hint_name_table_rva, 2)
                        # Get the Hint
                        imp_hint = self.get_word_from_data(data, 0)
                        imp_name = self.get_string_at_rva(table[idx].AddressOfData+2)
                    except PEFormatError, e:
                        pass

            imp_address = first_thunk+self.OPTIONAL_HEADER.ImageBase+idx*4
                
            if iat and ilt and ilt[idx].AddressOfData != iat[idx].AddressOfData:
                imp_bound = iat[idx].AddressOfData
            else:
                imp_bound = None
                
            if imp_name != '' and (imp_ord or imp_name):
                imported_symbols.append(
                    ImportData(
                        import_by_ordinal = import_by_ordinal,
                        ordinal = imp_ord,
                        hint = imp_hint,
                        name = imp_name,
                        bound = imp_bound,
                        address = imp_address,
                        hint_name_table_rva = hint_name_table_rva))
            
        return imported_symbols
                


    def get_import_table(self, rva):
    
        table = []
        
        while True and rva:
            try:
                data = self.get_data(rva)
            except PEFormatError, e:
                self.__warnings.append(
                    'Error parsing the import table. ' +
                    'Invalid data at RVA: 0x%x' % ( rva ) )
                return None
                
            if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
                format = self.__IMAGE_THUNK_DATA_format__
            elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
                format = self.__IMAGE_THUNK_DATA64_format__
                
            thunk_data = self.__unpack_data__(
                format, data, file_offset=self.get_offset_from_rva(rva) )
                    
            if not thunk_data or thunk_data.all_zeroes():
                break
                
            rva += thunk_data.sizeof()
            
            table.append(thunk_data)
            
        return table
    
    
    def get_memory_mapped_image(self, max_virtual_address=0x10000000, ImageBase=None):
        """Returns the data corresponding to the memory layout of the PE file.
        
        The data includes the PE header and the sections loaded at offsets
        corresponding to their relative virtual addresses. (the VirtualAddress
        section header member).
        Any offset in this data corresponds to the absolute memory address
        ImageBase+offset.
        
        The optional argument 'max_virtual_address' provides with means of limiting
        which section are processed.
        Any section with their VirtualAddress beyond this value will be skipped.
        Normally, sections with values beyond this range are just there to confuse
        tools. It's a common trick to see in packed executables.
        
        If the 'ImageBase' optional argument is supplied, the file's relocations
        will be applied to the image by calling the 'relocate_image()' method.
        """
        
        # Collect all sections in one code block    
        data = self.header
        for section in self.sections:

            # Miscellanous integrity tests.
            # Some packer will set these to bogus values to
            # make tools go nuts.
            #
            if section.Misc_VirtualSize == 0 or section.SizeOfRawData == 0:
                continue
            
            if section.SizeOfRawData > len(self.__data__):
                continue
                
            if section.PointerToRawData > len(self.__data__):
                continue
        
            if section.VirtualAddress >= max_virtual_address:
                continue
                
            padding_length = section.VirtualAddress - len(data)
            
            if padding_length>0:
                data += '\0'*padding_length
            elif padding_length<0:
                data = data[:padding_length]
                
            data += section.data
            
        return data

            
    def get_data(self, rva, length=None):
        """Get data regardless of the section where it lies on.
        
        Given a rva and the size of the chunk to retrieve, this method
        will find the section where the data lies and return the data.
        """
        
        s = self.get_section_by_rva(rva)

        if not s:
            if rva<len(self.header):
                if length:
                    end = rva+length
                else:
                    end = None
                return self.header[rva:end]
                
            raise PEFormatError, 'data at RVA can\'t be fetched. Corrupt header?'

        return s.get_data(rva, length)


    def get_rva_from_offset(self, offset):
        """Get the rva corresponding to this file offset. """

        s = self.get_section_by_offset(offset)
        if not s:
            raise PEFormatError("specified offset (0x%x) doesn't belong to any section." % offset)
        return s.get_rva_from_offset(offset)

    def get_offset_from_rva(self, rva):
        """Get the file offset corresponding to this rva.
        
        Given a rva , this method will find the section where the
        data lies and return the offset within the file.
        """
        
        s = self.get_section_by_rva(rva)
        if not s:
                
            raise PEFormatError, 'data at RVA can\'t be fetched. Corrupt header?'
            
        return s.get_offset_from_rva(rva)
        
            
    def get_string_at_rva(self, rva):
        """Get an ASCII string located at the given address."""

        s = self.get_section_by_rva(rva)
        if not s:
            if rva<len(self.header):
                return self.get_string_from_data(rva, self.header)
            return None
                
        return self.get_string_from_data(rva-s.VirtualAddress, s.data)
        
        
    def get_string_from_data(self, offset, data):
        """Get an ASCII string from within the data."""

        # OC Patch
        b = None
        
        try:
            b = data[offset]
        except IndexError:
            return ''
        
        s = ''
        while ord(b):
            s += b
            offset += 1
            try:
                b = data[offset]
            except IndexError:
                break
          
        return s

                
    def get_string_u_at_rva(self, rva, max_length = 2**16):
        """Get an Unicode string located at the given address."""
        
        try:
            # If the RVA is invalid all would blow up. Some EXEs seem to be
            # specially nasty and have an invalid RVA.
            data = self.get_data(rva, 2)
        except PEFormatError, e:
            return None

        #length = struct.unpack('<H', data)[0]
                
        s = u''
        for idx in xrange(max_length):
            try:
                uchr = struct.unpack('<H', self.get_data(rva+2*idx, 2))[0]
            except struct.error:
                break
                
            if unichr(uchr) == u'\0':
                break
            s += unichr(uchr)
            
        return s


    def get_section_by_offset(self, offset):
        """Get the section containing the given file offset."""
    
        sections = [s for s in self.sections if s.contains_offset(offset)]
        
        if sections:
            return sections[0]
                    
        return None


    def get_section_by_rva(self, rva):
        """Get the section containing the given address."""
    
        sections = [s for s in self.sections if s.contains_rva(rva)]
        
        if sections:
            return sections[0]
                    
        return None
    
    def __str__(self):
        return self.dump_info()
    
            
    def print_info(self):
        """Print all the PE header information in a human readable from."""
        print self.dump_info()
        
        
    def dump_info(self, dump=None):
        """Dump all the PE header information into human readable string."""
        
        
        if dump is None:
            dump = Dump()
        
        warnings = self.get_warnings()
        if warnings:
            dump.add_header('Parsing Warnings')
            for warning in warnings:
                dump.add_line(warning)
                dump.add_newline()
                
        
        dump.add_header('DOS_HEADER')
        dump.add_lines(self.DOS_HEADER.dump())
        dump.add_newline()
        
        dump.add_header('NT_HEADERS')
        dump.add_lines(self.NT_HEADERS.dump())
        dump.add_newline()
        
        dump.add_header('FILE_HEADER')
        dump.add_lines(self.FILE_HEADER.dump())
        
        image_flags = self.retrieve_flags(IMAGE_CHARACTERISTICS, 'IMAGE_FILE_')
            
        dump.add('Flags: ')
        flags = []
        for flag in image_flags:
            if getattr(self.FILE_HEADER, flag[0]):
                flags.append(flag[0])
        dump.add_line(', '.join(flags))
        dump.add_newline()
        
        if hasattr(self, 'OPTIONAL_HEADER') and self.OPTIONAL_HEADER is not None:
            dump.add_header('OPTIONAL_HEADER')
            dump.add_lines(self.OPTIONAL_HEADER.dump())

        dll_characteristics_flags = self.retrieve_flags(DLL_CHARACTERISTICS, 'IMAGE_DLL_CHARACTERISTICS_')
            
        dump.add('DllCharacteristics: ')
        flags = []
        for flag in dll_characteristics_flags:
            if getattr(self.OPTIONAL_HEADER, flag[0]):
                flags.append(flag[0])
        dump.add_line(', '.join(flags))
        dump.add_newline()
        
        
        dump.add_header('PE Sections')
        
        section_flags = self.retrieve_flags(SECTION_CHARACTERISTICS, 'IMAGE_SCN_')
        
        for section in self.sections:
            dump.add_lines(section.dump())
            dump.add('Flags: ')
            flags = []
            for flag in section_flags:
                if getattr(section, flag[0]):
                    flags.append(flag[0])
            dump.add_line(', '.join(flags))
            dump.add_line('Entropy: %f (Min=0.0, Max=8.0)' % section.get_entropy() )
            if md5 is not None:
                dump.add_line('MD5     hash: %s' % section.get_hash_md5() )
            if sha1 is not None:
                dump.add_line('SHA-1   hash: %s' % section.get_hash_sha1() )
            if sha256 is not None:
                dump.add_line('SHA-256 hash: %s' % section.get_hash_sha256() )
            if sha512 is not None:
                dump.add_line('SHA-512 hash: %s' % section.get_hash_sha512() )
            dump.add_newline()
            
            
        
        if (hasattr(self, 'OPTIONAL_HEADER') and 
            hasattr(self.OPTIONAL_HEADER, 'DATA_DIRECTORY') ):
            
            dump.add_header('Directories')
            for idx in xrange(len(self.OPTIONAL_HEADER.DATA_DIRECTORY)):
                directory = self.OPTIONAL_HEADER.DATA_DIRECTORY[idx]
                dump.add_lines(directory.dump())
            dump.add_newline()


        if hasattr(self, 'VS_VERSIONINFO'):
            dump.add_header('Version Information')
            dump.add_lines(self.VS_VERSIONINFO.dump())
            dump.add_newline()

            if hasattr(self, 'VS_FIXEDFILEINFO'):
                dump.add_lines(self.VS_FIXEDFILEINFO.dump())
                dump.add_newline()

            if hasattr(self, 'FileInfo'):
                for entry in self.FileInfo:
                    dump.add_lines(entry.dump())
                    dump.add_newline()
                    
                    if hasattr(entry, 'StringTable'):
                        for st_entry in entry.StringTable:
                            [dump.add_line('  '+line) for line in st_entry.dump()]
                            dump.add_line('  LangID: '+st_entry.LangID)
                            dump.add_newline()
                            for str_entry in st_entry.entries.items():
                                dump.add_line('    '+str_entry[0]+': '+str_entry[1])
                        dump.add_newline()
                                
                    elif hasattr(entry, 'Var'):
                        for var_entry in entry.Var:
                            if hasattr(var_entry, 'entry'):
                                [dump.add_line('  '+line) for line in var_entry.dump()]
                                dump.add_line(
                                    '    ' + var_entry.entry.keys()[0] + 
                                    ': ' + var_entry.entry.values()[0])
                                    
                        dump.add_newline()


            
        if hasattr(self, 'DIRECTORY_ENTRY_EXPORT'):
            dump.add_header('Exported symbols')
            dump.add_lines(self.DIRECTORY_ENTRY_EXPORT.struct.dump())
            dump.add_newline()
            dump.add_line('%-10s   %-10s  %s' % ('Ordinal', 'RVA', 'Name'))
            for export in self.DIRECTORY_ENTRY_EXPORT.symbols:
                dump.add('%-10d 0x%08Xh    %s' % (
                    export.ordinal, export.address, export.name))
                if export.forwarder:
                    dump.add_line(' forwarder: %s' % export.forwarder)
                else:
                    dump.add_newline()
                
            dump.add_newline()
        
        if hasattr(self, 'DIRECTORY_ENTRY_IMPORT'):
            dump.add_header('Imported symbols')
            for module in self.DIRECTORY_ENTRY_IMPORT:
                dump.add_lines(module.struct.dump())
                dump.add_newline()
                for symbol in module.imports:
                
                    if symbol.import_by_ordinal is True:
                        dump.add('%s Ordinal[%s] (Imported by Ordinal)' % (
                            module.dll, str(symbol.ordinal)))
                    else:
                        dump.add('%s.%s Hint[%s]' % (
                            module.dll, symbol.name, str(symbol.hint)))

                    if symbol.bound:
                        dump.add_line(' Bound: 0x%08X' % (symbol.bound))
                    else:
                        dump.add_newline()
                dump.add_newline()
                
        
        if hasattr(self, 'DIRECTORY_ENTRY_BOUND_IMPORT'):
            dump.add_header('Bound imports')
            for bound_imp_desc in self.DIRECTORY_ENTRY_BOUND_IMPORT:
            
                dump.add_lines(bound_imp_desc.struct.dump())
                dump.add_line('DLL: %s' % bound_imp_desc.name)
                dump.add_newline()
                
                for bound_imp_ref in bound_imp_desc.entries:
                    dump.add_lines(bound_imp_ref.struct.dump(), 4)
                    dump.add_line('DLL: %s' % bound_imp_ref.name, 4)
                    dump.add_newline()


        if hasattr(self, 'DIRECTORY_ENTRY_DELAY_IMPORT'):
            dump.add_header('Delay Imported symbols')
            for module in self.DIRECTORY_ENTRY_DELAY_IMPORT:
            
                dump.add_lines(module.struct.dump())
                dump.add_newline()
                
                for symbol in module.imports:
                    if symbol.import_by_ordinal is True:
                        dump.add('%s Ordinal[%s] (Imported by Ordinal)' % (
                            module.dll, str(symbol.ordinal)))
                    else:
                        dump.add('%s.%s Hint[%s]' % (
                            module.dll, symbol.name, str(symbol.hint)))
                    
                    if symbol.bound:
                        dump.add_line(' Bound: 0x%08X' % (symbol.bound))
                    else:
                        dump.add_newline()
                dump.add_newline()
            
        
        if hasattr(self, 'DIRECTORY_ENTRY_RESOURCE'):
            dump.add_header('Resource directory')
            
            dump.add_lines(self.DIRECTORY_ENTRY_RESOURCE.struct.dump())
            
            for resource_type in self.DIRECTORY_ENTRY_RESOURCE.entries:
            
                if resource_type.name is not None:
                    dump.add_line('Name: [%s]' % resource_type.name, 2)
                else:
                    dump.add_line('Id: [0x%X] (%s)' % (
                        resource_type.struct.Id, RESOURCE_TYPE.get(
                            resource_type.struct.Id, '-')),
                        2)
                        
                dump.add_lines(resource_type.struct.dump(), 2)

                if hasattr(resource_type, 'directory'):
                
                    dump.add_lines(resource_type.directory.struct.dump(), 4)
                        
                    for resource_id in resource_type.directory.entries:
                    
                        if resource_id.name is not None:
                            dump.add_line('Name: [%s]' % resource_id.name, 6)
                        else:
                            dump.add_line('Id: [0x%X]' % resource_id.struct.Id, 6)
                            
                        dump.add_lines(resource_id.struct.dump(), 6)
    
                        if hasattr(resource_id, 'directory'):
                            dump.add_lines(resource_id.directory.struct.dump(), 8)
                            
                            for resource_lang in resource_id.directory.entries:
                            #    dump.add_line('\\--- LANG [%d,%d][%s]' % (
                            #        resource_lang.data.lang,
                            #        resource_lang.data.sublang,
                            #        LANG[resource_lang.data.lang]), 8)
                                dump.add_lines(resource_lang.struct.dump(), 10)
                                dump.add_lines(resource_lang.data.struct.dump(), 12)
                dump.add_newline()
    
            dump.add_newline()
        
        
        if ( hasattr(self, 'DIRECTORY_ENTRY_TLS') and 
             self.DIRECTORY_ENTRY_TLS and 
             self.DIRECTORY_ENTRY_TLS.struct ):
             
            dump.add_header('TLS')
            dump.add_lines(self.DIRECTORY_ENTRY_TLS.struct.dump())
            dump.add_newline()

        
        if hasattr(self, 'DIRECTORY_ENTRY_DEBUG'):
            dump.add_header('Debug information')
            for dbg in self.DIRECTORY_ENTRY_DEBUG:
                dump.add_lines(dbg.struct.dump())
                try:
                    dump.add_line('Type: '+DEBUG_TYPE[dbg.struct.Type])
                except KeyError:
                    dump.add_line('Type: 0x%x(Unknown)' % dbg.struct.Type)
                dump.add_newline()
        
        
        if hasattr(self, 'DIRECTORY_ENTRY_BASERELOC'):
            dump.add_header('Base relocations')
            for base_reloc in self.DIRECTORY_ENTRY_BASERELOC:
                dump.add_lines(base_reloc.struct.dump())
                for reloc in base_reloc.entries:
                    try:
                        dump.add_line('%08Xh %s' % (
                            reloc.rva, RELOCATION_TYPE[reloc.type][16:]), 4)
                    except KeyError:
                        dump.add_line('0x%08X 0x%x(Unknown)' % (
                            reloc.rva, reloc.type), 4)
                dump.add_newline()
        

        return dump.get_text()

    # OC Patch
    def get_physical_by_rva(self, rva):
        """Gets the physical address in the PE file from an RVA value."""
        try:
            return self.get_offset_from_rva(rva)
        except Exception:
            return None


    ##
    # Double-Word get/set
    ##

    def get_data_from_dword(self, dword):
        """Return a four byte string representing the double word value. (little endian)."""
        return struct.pack('<L', dword)


    def get_dword_from_data(self, data, offset):
        """Convert four bytes of data to a double word (little endian)
        
        'offset' is assumed to index into a dword array. So setting it to
        N will return a dword out of the data sarting at offset N*4.
        
        Returns None if the data can't be turned into a double word.
        """
        
        if (offset+1)*4 > len(data):
            return None
        
        return struct.unpack('<L', data[offset*4:(offset+1)*4])[0]
        
        
    def get_dword_at_rva(self, rva):
        """Return the double word value at the given RVA.
        
        Returns None if the value can't be read, i.e. the RVA can't be mapped
        to a file offset.
        """
        
        try:
            return self.get_dword_from_data(self.get_data(rva)[:4], 0)
        except PEFormatError:
            return None


    def get_dword_from_offset(self, offset):
        """Return the double word value at the given file offset. (little endian)"""
        
        if offset+4 > len(self.__data__):
            return None
            
        return self.get_dword_from_data(self.__data__[offset:offset+4], 0)


    def set_dword_at_rva(self, rva, dword):
        """Set the double word value at the file offset corresponding to the given RVA."""
        return self.set_bytes_at_rva(rva, self.get_data_from_dword(dword))


    def set_dword_at_offset(self, offset, dword):
        """Set the double word value at the given file offset."""
        return self.set_bytes_at_offset(offset, self.get_data_from_dword(dword))



    ##
    # Word get/set
    ##

    def get_data_from_word(self, word):
        """Return a two byte string representing the word value. (little endian)."""
        return struct.pack('<H', word)


    def get_word_from_data(self, data, offset):
        """Convert two bytes of data to a word (little endian)
        
        'offset' is assumed to index into a word array. So setting it to
        N will return a dword out of the data sarting at offset N*2.

        Returns None if the data can't be turned into a word.
        """

        if (offset+1)*2 > len(data):
            return None

        return struct.unpack('<H', data[offset*2:(offset+1)*2])[0]


    def get_word_at_rva(self, rva):
        """Return the word value at the given RVA.
        
        Returns None if the value can't be read, i.e. the RVA can't be mapped
        to a file offset.
        """
        
        try:
            return self.get_word_from_data(self.get_data(rva)[:2], 0)
        except PEFormatError:
            return None
            

    def get_word_from_offset(self, offset):
        """Return the word value at the given file offset. (little endian)"""
        
        if offset+2 > len(self.__data__):
            return None
            
        return self.get_word_from_data(self.__data__[offset:offset+2], 0)


    def set_word_at_rva(self, rva, word):
        """Set the word value at the file offset corresponding to the given RVA."""
        return self.set_bytes_at_rva(rva, self.get_data_from_word(word))


    def set_word_at_offset(self, offset, word):
        """Set the word value at the given file offset."""
        return self.set_bytes_at_offset(offset, self.get_data_from_word(word))


    ##
    # Quad-Word get/set
    ##

    def get_data_from_qword(self, word):
        """Return a eight byte string representing the quad-word value. (little endian)."""
        return struct.pack('<Q', word)


    def get_qword_from_data(self, data, offset):
        """Convert eight bytes of data to a word (little endian)
        
        'offset' is assumed to index into a word array. So setting it to
        N will return a dword out of the data sarting at offset N*8.

        Returns None if the data can't be turned into a quad word.
        """

        if (offset+1)*8 > len(data):
            return None

        return struct.unpack('<Q', data[offset*8:(offset+1)*8])[0]


    def get_qword_at_rva(self, rva):
        """Return the quad-word value at the given RVA.
        
        Returns None if the value can't be read, i.e. the RVA can't be mapped
        to a file offset.
        """
        
        try:
            return self.get_qword_from_data(self.get_data(rva)[:8], 0)
        except PEFormatError:
            return None
            

    def get_qword_from_offset(self, offset):
        """Return the quad-word value at the given file offset. (little endian)"""
        
        if offset+8 > len(self.__data__):
            return None
            
        return self.get_qword_from_data(self.__data__[offset:offset+8], 0)


    def set_qword_at_rva(self, rva, qword):
        """Set the quad-word value at the file offset corresponding to the given RVA."""
        return self.set_bytes_at_rva(rva, self.get_data_from_qword(qword))


    def set_qword_at_offset(self, offset, qword):
        """Set the quad-word value at the given file offset."""
        return self.set_bytes_at_offset(offset, self.get_data_from_qword(qword))



    ##
    # Set bytes
    ##


    def set_bytes_at_rva(self, rva, data):
        """Overwrite, with the given string, the bytes at the file offset corresponding to the given RVA.
        
        Return True if successful, False otherwise. It can fail if the
        offset is outside the file's boundaries.
        """

        offset = self.get_physical_by_rva(rva)
        if not offset:
            raise False
            
        return self.set_bytes_at_offset(offset, data)

        
    def set_bytes_at_offset(self, offset, data):
        """Overwrite the bytes at the given file offset with the given string.
        
        Return True if successful, False otherwise. It can fail if the
        offset is outside the file's boundaries.
        """
    
        if not isinstance(data, str):
            raise TypeError('data should be of type: str')
        
        if offset >= 0 and offset < len(self.__data__):
            self.__data__ = ( self.__data__[:offset] + 
                data +
                self.__data__[offset+len(data):] )
        else:
            return False
            
        # Refresh the section's data with the modified information
        #
        for section in self.sections:
            section_data_start = section.PointerToRawData
            section_data_end = section_data_start+section.SizeOfRawData
            section.data = self.__data__[section_data_start:section_data_end]

        return True
        


    def relocate_image(self, new_ImageBase):
        """Apply the relocation information to the image using the provided new image base.
        
        This method will apply the relocation information to the image. Given the new base,
        all the relocations will be processed and both the raw data and the section's data
        will be fixed accordingly.
        The resulting image can be retrieved as well through the method:
        
            get_memory_mapped_image()
            
        In order to get something that would more closely match what could be found in memory
        once the Windows loader finished its work.
        """
        
        relocation_difference = new_ImageBase - self.OPTIONAL_HEADER.ImageBase
        
        
        for reloc in self.DIRECTORY_ENTRY_BASERELOC:

            virtual_address = reloc.struct.VirtualAddress
            size_of_block = reloc.struct.SizeOfBlock
            
            # We iterate with an index because if the relocation is of type
            # IMAGE_REL_BASED_HIGHADJ we need to also process the next entry
            # at once and skip it for the next interation
            # 
            entry_idx = 0
            while entry_idx<len(reloc.entries):
            
                entry = reloc.entries[entry_idx]
                entry_idx += 1
                                
                if entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_ABSOLUTE']:
                    # Nothing to do for this type of relocation
                    pass
                    
                elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGH']:
                    # Fix the high 16bits of a relocation
                    #
                    # Add high 16bits of relocation_difference to the
                    # 16bit value at RVA=entry.rva
                    
                    self.set_word_at_rva(
                        entry.rva,
                        ( self.get_word_at_rva(entry.rva) + relocation_difference>>16)&0xffff )
                  
                elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_LOW']:
                    # Fix the low 16bits of a relocation
                    #
                    # Add low 16 bits of relocation_difference to the 16bit value
                    # at RVA=entry.rva
                    
                    self.set_word_at_rva(
                        entry.rva,
                        ( self.get_word_at_rva(entry.rva) + relocation_difference)&0xffff)
                    
                elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGHLOW']:
                    # Handle all high and low parts of a 32bit relocation
                    #
                    # Add relocation_difference to the value at RVA=entry.rva
                    
                    self.set_dword_at_rva(
                        entry.rva,
                        self.get_dword_at_rva(entry.rva)+relocation_difference)
                  
                elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGHADJ']:
                    # Fix the high 16bits of a relocation and adjust
                    #
                    # Add high 16bits of relocation_difference to the 32bit value
                    # composed from the (16bit value at RVA=entry.rva)<<16 plus 
                    # the 16bit value at the next relocation entry.
                    #
                    
                    # If the next entry is beyond the array's limits,
                    # abort... the table is corrupt
                    #
                    if entry_idx == len(reloc.entries):
                        break
                    
                    next_entry = reloc.entries[entry_idx]
                    entry_idx += 1
                    self.set_word_at_rva( entry.rva, 
                        ((self.get_word_at_rva(entry.rva)<<16) + next_entry.rva +
                        relocation_difference & 0xffff0000) >> 16 )
                
                elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_DIR64']:
                    # Apply the difference to the 64bit value at the offset
                    # RVA=entry.rva
                    
                    self.set_qword_at_rva(
                        entry.rva,
                        self.get_qword_at_rva(entry.rva) + relocation_difference)


    def verify_checksum(self):
    
        return self.OPTIONAL_HEADER.CheckSum == self.generate_checksum()
        
    
    def generate_checksum(self):
    
        # Get the offset to the CheckSum field in the OptionalHeader
        #
        checksum_offset = self.OPTIONAL_HEADER.__file_offset__ + 0x40 # 64
        
        checksum = 0
        
        for i in range( len(self.__data__) / 4 ):
        
            # Skip the checksum field
            #
            if i == checksum_offset / 4:
                continue

            dword = struct.unpack('L', self.__data__[ i*4 : i*4+4 ])[0]
            checksum = (checksum & 0xffffffff) + dword + (checksum>>32)
            if checksum > 2**32:
                checksum = (checksum & 0xffffffff) + (checksum >> 32)
        
        checksum = (checksum & 0xffff) + (checksum >> 16)
        checksum = (checksum) + (checksum >> 16)
        checksum = checksum & 0xffff
        
        return checksum + len(self.__data__)

登录后可以享受更多权益