#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "libdis.h"
#include <inttypes.h>
#ifdef _MSC_VER
#define snprintf _snprintf
#define inline __inline
#endif
/*
* concatenation macros. STRNCATF concatenates a format string, buf
* only with one argument.
*/
#define STRNCAT( buf, str, len ) do { \
int _i = strlen(str), _blen = strlen(buf), _len = len - 1; \
if ( len ) { \
strncat( buf, str, _len ); \
if ( _len <= _i ) { \
buf[_blen+_len] = '\0'; \
len = 0; \
} else { \
len -= _i; \
} \
} \
} while( 0 )
#define STRNCATF( buf, fmt, data, len ) do { \
char _tmp[MAX_OP_STRING]; \
\
snprintf( _tmp, sizeof _tmp, fmt, data ); \
STRNCAT( buf, _tmp, len ); \
} while( 0 )
#define PRINT_DISPLACEMENT( ea ) do { \
if ( ea->disp_size && ea->disp ) { \
if ( ea->disp_sign ) { \
STRNCATF( buf, "-0x%" PRIX32, -ea->disp, len ); \
} else { \
STRNCATF( buf, "0x%" PRIX32, ea->disp, len ); \
} \
} \
} while( 0 )
static const char *prefix_strings[] = {
"", /* no prefix */
"repz ", /* the trailing spaces make it easy to prepend to mnemonic */
"repnz ",
"lock ",
"branch delay " /* unused in x86 */
};
static int format_insn_prefix_str( enum x86_insn_prefix prefix, char *buf,
int len ) {
int len_orig = len;
/* concat all prefix strings */
if ( prefix & 1 ) { STRNCAT( buf, prefix_strings[1], len ); }
if ( prefix & 2 ) { STRNCAT( buf, prefix_strings[2], len ); }
if ( prefix & 4 ) { STRNCAT( buf, prefix_strings[3], len ); }
if ( prefix & 8 ) { STRNCAT( buf, prefix_strings[4], len ); }
/* return the number of characters added */
return (len_orig - len);
}
/*
* sprint's an operand's data to string str.
*/
static void get_operand_data_str( x86_op_t *op, char *str, int len ){
if ( op->flags & op_signed ) {
switch ( op->datatype ) {
case op_byte:
snprintf( str, len, "%" PRId8, op->data.sbyte );
return;
case op_word:
snprintf( str, len, "%" PRId16, op->data.sword );
return;
case op_qword:
snprintf( str, len, "%" PRId64, op->data.sqword );
return;
default:
snprintf( str, len, "%" PRId32, op->data.sdword );
return;
}
}
//else
switch ( op->datatype ) {
case op_byte:
snprintf( str, len, "0x%02" PRIX8, op->data.byte );
return;
case op_word:
snprintf( str, len, "0x%04" PRIX16, op->data.word );
return;
case op_qword:
snprintf( str, len, "0x%08" PRIX64,op->data.sqword );
return;
default:
snprintf( str, len, "0x%08" PRIX32, op->data.dword );
return;
}
}
/*
* sprints register types to a string. the register types can be ORed
* together.
*/
static void get_operand_regtype_str( int regtype, char *str, int len )
{
static struct {
const char *name;
int value;
} operand_regtypes[] = {
{"reg_gen" , 0x00001},
{"reg_in" , 0x00002},
{"reg_out" , 0x00004},
{"reg_local" , 0x00008},
{"reg_fpu" , 0x00010},
{"reg_seg" , 0x00020},
{"reg_simd" , 0x00040},
{"reg_sys" , 0x00080},
{"reg_sp" , 0x00100},
{"reg_fp" , 0x00200},
{"reg_pc" , 0x00400},
{"reg_retaddr", 0x00800},
{"reg_cond" , 0x01000},
{"reg_zero" , 0x02000},
{"reg_ret" , 0x04000},
{"reg_src" , 0x10000},
{"reg_dest" , 0x20000},
{"reg_count" , 0x40000},
{NULL, 0}, //end
};
unsigned int i;
memset( str, 0, len );
//go thru every type in the enum
for ( i = 0; operand_regtypes[i].name; i++ ) {
//skip if type is not set
if(! (regtype & operand_regtypes[i].value) )
continue;
//not the first time around
if( str[0] ) {
STRNCAT( str, " ", len );
}
STRNCAT(str, operand_regtypes[i].name, len );
}
}
static int format_expr( x86_ea_t *ea, char *buf, int len,
enum x86_asm_format format ) {
char str[MAX_OP_STRING];
if ( format == att_syntax ) {
if (ea->base.name[0] || ea->index.name[0] || ea->scale) {
PRINT_DISPLACEMENT(ea);
STRNCAT( buf, "(", len );
if ( ea->base.name[0]) {
STRNCATF( buf, "%%%s", ea->base.name, len );
}
if ( ea->index.name[0]) {
STRNCATF( buf, ",%%%s", ea->index.name, len );
if ( ea->scale > 1 ) {
STRNCATF( buf, ",%d", ea->scale, len );
}
}
/* handle the syntactic exception */
if ( ! ea->base.name[0] &&
! ea->index.name[0] ) {
STRNCATF( buf, ",%d", ea->scale, len );
}
STRNCAT( buf, ")", len );
} else
STRNCATF( buf, "0x%" PRIX32, ea->disp, len );
} else if ( format == xml_syntax ){
if ( ea->base.name[0]) {
STRNCAT (buf, "\t\t\t<base>\n", len);
get_operand_regtype_str (ea->base.type, str,
sizeof str);
STRNCAT (buf, "\t\t\t\t<register ", len);
STRNCATF (buf, "name=\"%s\" ", ea->base.name, len);
STRNCATF (buf, "type=\"%s\" ", str, len);
STRNCATF (buf, "size=%d/>\n", ea->base.size, len);
STRNCAT (buf, "\t\t\t</base>\n", len);
}
if ( ea->index.name[0]) {
STRNCAT (buf, "\t\t\t<index>\n", len);
get_operand_regtype_str (ea->index.type, str,
sizeof str);
STRNCAT (buf, "\t\t\t\t<register ", len);
STRNCATF (buf, "name=\"%s\" ", ea->index.name, len);
STRNCATF (buf, "type=\"%s\" ", str, len);
STRNCATF (buf, "size=%d/>\n", ea->index.size, len);
STRNCAT (buf, "\t\t\t</index>\n", len);
}
//scale
STRNCAT (buf, "\t\t\t<scale>\n", len);
STRNCAT (buf, "\t\t\t\t<immediate ", len);
STRNCATF (buf, "value=\"%d\"/>\n", ea->scale, len);
STRNCAT (buf, "\t\t\t</scale>\n", len);
if ( ea->disp_size ) {
STRNCAT (buf, "\t\t\t<displacement>\n", len);
if ( ea->disp_size > 1 && ! ea->disp_sign ) {
STRNCAT (buf, "\t\t\t\t<address ", len);
STRNCATF (buf, "value=\"0x%" PRIX32 "\"/>\n", ea->disp,
len);
} else {
STRNCAT (buf, "\t\t\t\t<immediate ", len);
STRNCATF (buf, "value=%" PRId32 "/>\n", ea->disp, len);
}
STRNCAT (buf, "\t\t\t</displacement>\n", len);
}
} else if ( format == raw_syntax ) {
PRINT_DISPLACEMENT(ea);
STRNCAT( buf, "(", len );
STRNCATF( buf, "%s,", ea->base.name, len );
STRNCATF( buf, "%s,", ea->index.name, len );
STRNCATF( buf, "%d", ea->scale, len );
STRNCAT( buf, ")", len );
} else {
STRNCAT( buf, "[", len );
if ( ea->base.name[0] ) {
STRNCAT( buf, ea->base.name, len );
if ( ea->index.name[0] ||
(ea->disp_size && ! ea->disp_sign) ) {
STRNCAT( buf, "+", len );
}
}
if ( ea->index.name[0] ) {
STRNCAT( buf, ea->index.name, len );
if ( ea->scale > 1 )
{
STRNCATF( buf, "*%" PRId32, ea->scale, len );
}
if ( ea->disp_size && ! ea->disp_sign )
{
STRNCAT( buf, "+", len );
}
}
if ( ea->disp_size || (! ea->index.name[0] &&
! ea->base.name[0] ) )
{
PRINT_DISPLACEMENT(ea);
}
STRNCAT( buf, "]", len );
}
return( strlen(buf) );
}
static int format_seg( x86_op_t *op, char *buf, int len,
enum x86_asm_format format ) {
int len_orig = len;
const char *reg = "";
if (! op || ! buf || ! len || ! op->flags) {
return(0);
}
if ( op->type != op_offset && op->type != op_expression ){
return(0);
}
if (! ((int) op->flags & 0xF00) ) {
return(0);
}
switch (op->flags & 0xF00) {
case op_es_seg: reg = "es"; break;
case op_cs_seg: reg = "cs"; break;
case op_ss_seg: reg = "ss"; break;
case op_ds_seg: reg = "ds"; break;
case op_fs_seg: reg = "fs"; break;
case op_gs_seg: reg = "gs"; break;
default:
break;
}
if (! reg[0] ) {
return( 0 );
}
switch( format ) {
case xml_syntax:
STRNCAT( buf, "\t\t\t<segment ", len );
STRNCATF( buf, "value=\"%s\"/>\n", reg, len );
break;
case att_syntax:
STRNCATF( buf, "%%%s:", reg, len );
break;
default:
STRNCATF( buf, "%s:", reg, len );
break;
}
return( len_orig - len ); /* return length of appended string */
}
static const char *get_operand_datatype_str( x86_op_t *op ){
static const char *types[] = {
"sbyte", /* 0 */
"sword",
"sqword",
"sdword",
"sdqword", /* 4 */
"byte",
"word",
"qword",
"dword", /* 8 */
"dqword",
"sreal",
"dreal",
"extreal", /* 12 */
"bcd",
"ssimd",
"dsimd",
"sssimd", /* 16 */
"sdsimd",
"descr32",
"descr16",
"pdescr32", /* 20 */
"pdescr16",
"bounds16",
"bounds32",
"fpu_env16",
"fpu_env32", /* 25 */
"fpu_state16",
"fpu_state32",
"fp_reg_set"
};
/* handle signed values first */
if ( op->flags & op_signed ) {
switch (op->datatype) {
case op_byte: return types[0];
case op_word: return types[1];
case op_qword: return types[2];
case op_dqword: return types[4];
default: return types[3];
}
}
switch (op->datatype) {
case op_byte: return types[5];
case op_word: return types[6];
case op_qword: return types[7];
case op_dqword: return types[9];
case op_sreal: return types[10];
case op_dreal: return types[11];
case op_extreal: return types[12];
case op_bcd: return types[13];
case op_ssimd: return types[14];
case op_dsimd: return types[15];
case op_sssimd: return types[16];
case op_sdsimd: return types[17];
case op_descr32: return types[18];
case op_descr16: return types[19];
case op_pdescr32: return types[20];
case op_pdescr16: return types[21];
case op_bounds16: return types[22];
case op_bounds32: return types[23];
case op_fpustate16: return types[24];
case op_fpustate32: return types[25];
case op_fpuenv16: return types[26];
case op_fpuenv32: return types[27];
case op_fpregset: return types[28];
default: return types[8];
}
}
static int format_insn_eflags_str( enum x86_flag_status flags, char *buf,
int len) {
static struct {
const char *name;
int value;
} insn_flags[] = {
{ "carry_set ", 0x0001 },
{ "zero_set ", 0x0002 },
{ "oflow_set ", 0x0004 },
{ "dir_set ", 0x0008 },
{ "sign_set ", 0x0010 },
{ "parity_set ", 0x0020 },
{ "carry_or_zero_set ", 0x0040 },
{ "zero_set_or_sign_ne_oflow ", 0x0080 },
{ "carry_clear ", 0x0100 },
{ "zero_clear ", 0x0200 },
{ "oflow_clear ", 0x0400 },
{ "dir_clear ", 0x0800 },
{ "sign_clear ", 0x1000 },
{ "parity_clear ", 0x2000 },
{ "sign_eq_oflow ", 0x4000 },
{ "sign_ne_oflow ", 0x8000 },
{ NULL, 0x0000 }, //end
};
unsigned int i;
int len_orig = len;
for (i = 0; insn_flags[i].name; i++) {
if (! (flags & insn_flags[i].value) )
continue;
STRNCAT( buf, insn_flags[i].name, len );
}
return( len_orig - len );
}
static const char *get_insn_group_str( enum x86_insn_group gp ) {
static const char *types[] = {
"", // 0
"controlflow",// 1
"arithmetic", // 2
"logic", // 3
"stack", // 4
"comparison", // 5
"move", // 6
"string", // 7
"bit_manip", // 8
"flag_manip", // 9
"fpu", // 10
"", // 11
"", // 12
"interrupt", // 13
"system", // 14
"other", // 15
};
if ( gp > sizeof (types)/sizeof(types[0]) )
return "";
return types[gp];
}
static const char *get_insn_type_str( enum x86_insn_type type ) {
static struct {
const char *name;
int value;
} types[] = {
/* insn_controlflow */
{ "jmp", 0x1001 },
{ "jcc", 0x1002 },
{ "call", 0x1003 },
{ "callcc", 0x1004 },
{ "return", 0x1005 },
{ "loop", 0x1006 },
/* insn_arithmetic */
{ "add", 0x2001 },
{ "sub", 0x2002 },
{ "mul", 0x2003 },
{ "div", 0x2004 },
{ "inc", 0x2005 },
{ "dec", 0x2006 },
{ "shl", 0x2007 },
{ "shr", 0x2008 },
{ "rol", 0x2009 },
{ "ror", 0x200A },
/* insn_logic */
{ "and", 0x3001 },
{ "or", 0x3002 },
{ "xor", 0x3003 },
{ "not", 0x3004 },
{ "neg", 0x3005 },
/* insn_stack */
{ "push", 0x4001 },
{ "pop", 0x4002 },
{ "pushregs", 0x4003 },
{ "popregs", 0x4004 },
{ "pushflags", 0x4005 },
{ "popflags", 0x4006 },
{ "enter", 0x4007 },
{ "leave", 0x4008 },
/* insn_comparison */
{ "test", 0x5001 },
{ "cmp", 0x5002 },
/* insn_move */
{ "mov", 0x6001 }, /* move */
{ "movcc", 0x6002 }, /* conditional move */
{ "xchg", 0x6003 }, /* exchange */
{ "xchgcc", 0x6004 }, /* conditional exchange */
/* insn_string */
{ "strcmp", 0x7001 },
{ "strload", 0x7002 },
{ "strmov", 0x7003 },
{ "strstore", 0x7004 },
{ "translate", 0x7005 }, /* xlat */
/* insn_bit_manip */
{ "bittest", 0x8001 },
{ "bitset", 0x8002 },
{ "bitclear", 0x8003 },
/* insn_flag_manip */
{ "clear_carry", 0x9001 },
{ "clear_zero", 0x9002 },
{ "clear_oflow", 0x9003 },
{ "clear_dir", 0x9004 },
{ "clear_sign", 0x9005 },
{ "clear_parity", 0x9006 },
{ "set_carry", 0x9007 },
{ "set_zero", 0x9008 },
{ "set_oflow", 0x9009 },
{ "set_dir", 0x900A },
{ "set_sign", 0x900B },
{ "set_parity", 0x900C },
{ "tog_carry", 0x9010 },
{ "tog_zero", 0x9020 },
{ "tog_oflow", 0x9030 },
{ "tog_dir", 0x9040 },
{ "tog_sign", 0x9050 },
{ "tog_parity", 0x9060 },
/* insn_fpu */
{ "fmov", 0xA001 },
{ "fmovcc", 0xA002 },
{ "fneg", 0xA003 },
{ "fabs", 0xA004 },
{ "fadd", 0xA005 },
{ "fsub", 0xA006 },
{ "fmul", 0xA007 },
{ "fdiv", 0xA008 },
{ "fsqrt", 0xA009 },
{ "fcmp", 0xA00A },
{ "fcos", 0xA00C },
{ "fldpi", 0xA00D },
{ "fldz", 0xA00E },
{ "ftan", 0xA00F },
{ "fsine", 0xA010 },
{ "fsys", 0xA020 },
/* insn_interrupt */
{ "int", 0xD001 },
{ "intcc", 0xD002 }, /* not present in x86 ISA */
{ "iret", 0xD003 },
{ "bound", 0xD004 },
{ "debug", 0xD005 },
{ "trace", 0xD006 },
{ "invalid_op", 0xD007 },
{ "oflow", 0xD008 },
/* insn_system */
{ "halt", 0xE001 },
{ "in", 0xE002 }, /* input from port/bus */
{ "out", 0xE003 }, /* output to port/bus */
{ "cpuid", 0xE004 },
/* insn_other */
{ "nop", 0xF001 },
{ "bcdconv", 0xF002 }, /* convert to or from BCD */
{ "szconv", 0xF003 }, /* change size of operand */
{ NULL, 0 }, //end
};
unsigned int i;
//go thru every type in the enum
for ( i = 0; types[i].name; i++ ) {
if ( types[i].value == type )
return types[i].name;
}
return "";
}
static const char *get_insn_cpu_str( enum x86_insn_cpu cpu ) {
static const char *intel[] = {
"", // 0
"8086", // 1
"80286", // 2
"80386", // 3
"80387", // 4
"80486", // 5
"Pentium", // 6
"Pentium Pro", // 7
"Pentium 2", // 8
"Pentium 3", // 9
"Pentium 4" // 10
};
if ( cpu < sizeof(intel)/sizeof(intel[0]) ) {
return intel[cpu];
} else if ( cpu == 16 ) {
return "K6";
} else if ( cpu == 32 ) {
return "K7";
} else if ( cpu == 48 ) {
return "Athlon";
}
return "";
}
static const char *get_insn_isa_str( enum x86_insn_isa isa ) {
static const char *subset[] = {
NULL, // 0
"General Purpose", // 1
"Floating Point", // 2
"FPU Management", // 3
"MMX", // 4
"SSE", // 5
"SSE2", // 6
"SSE3", // 7
"3DNow!", // 8
"System" // 9
};
if ( isa > sizeof (subset)/sizeof(subset[0]) ) {
return "";
}
return subset[isa];
}
static int format_operand_att( x86_op_t *op, x86_insn_t *insn, char *buf,
int len){
char str[MAX_OP_STRING];
memset (str, 0, sizeof str);
switch ( op->type ) {
case op_register:
STRNCATF( buf, "%%%s", op->data.reg.name, len );
break;
case op_immediate:
get_operand_data_str( op, str, sizeof str );
STRNCATF( buf, "$%s", str, len );
break;
case op_relative_near:
STRNCATF( buf, "0x%08X",
(unsigned int)(op->data.sbyte +
insn->addr + insn->size), len );
break;
case op_relative_far:
if (op->datatype == op_word) {
STRNCATF( buf, "0x%08X",
(unsigned int)(op->data.sword +
insn->addr + insn->size), len );
} else {
STRNCATF( buf, "0x%08X",
(unsigned int)(op->data.sdword +
insn->addr + insn->size), len );
}
break;
case op_absolute:
/* ATT uses the syntax $section, $offset */
STRNCATF( buf, "$0x%04" PRIX16 ", ", op->data.absolute.segment,
len );
if (op->datatype == op_descr16) {
STRNCATF( buf, "$0x%04" PRIX16,
op->data.absolute.offset.off16, len );
} else {
STRNCATF( buf, "$0x%08" PRIX32,
op->data.absolute.offset.off32, len );
}
break;
case op_offset:
/* ATT requires a '*' before JMP/CALL ops */
if (insn->type == insn_jmp || insn->type == insn_call)
STRNCAT( buf, "*", len );
len -= format_seg( op, buf, len, att_syntax );
STRNCATF( buf, "0x%08" PRIX32, op->data.sdword, len );
break;
case op_expression:
/* ATT requires a '*' before JMP/CALL ops */
if (insn->type == insn_jmp || insn->type == insn_call)
STRNCAT( buf, "*", len );
len -= format_seg( op, buf, len, att_syntax );
len -= format_expr( &op->data.expression, buf, len,
att_syntax );
break;
case op_unused:
case op_unknown:
/* return 0-truncated buffer */
break;
}
return ( strlen( buf ) );
}
static int format_operand_native( x86_op_t *op, x86_insn_t *insn, char *buf,
int len){
char str[MAX_OP_STRING];
switch (op->type) {
case op_register:
STRNCAT( buf, op->data.reg.name, len );
break;
case op_immediate:
get_operand_data_str( op, str, sizeof str );
STRNCAT( buf, str, len );
break;
case op_relative_near:
STRNCATF( buf, "0x%08" PRIX32,
(unsigned int)(op->data.sbyte +
insn->addr + insn->size), len );
break;
case op_relative_far:
if ( op->datatype == op_word ) {
STRNCATF( buf, "0x%08" PRIX32,
(unsigned int)(op->data.sword +
insn->addr + insn->size), len );
break;
} else {
STRNCATF( buf, "0x%08" PRIX32, op->data.sdword +
insn->addr + insn->size, len );
}
break;
case op_absolute:
STRNCATF( buf, "$0x%04" PRIX16 ":", op->data.absolute.segment,
len );
if (op->datatype == op_descr16) {
STRNCATF( buf, "0x%04" PRIX16,
op->data.absolute.offset.off16, len );
} else {
STRNCATF( buf, "0x%08" PRIX32,
op->data.absolute.offset.off32, len );
}
break;
case op_offset:
len -= format_seg( op, buf, len, native_syntax );
STRNCATF( buf, "[0x%08" PRIX32 "]", op->data.sdword, len );
break;
case op_expression:
len -= format_seg( op, buf, len, native_syntax );
len -= format_expr( &op->data.expression, buf, len,
native_syntax );
break;
case op_unused:
case op_unknown:
/* return 0-truncated buffer */
break;
}
return( strlen( buf ) );
}
static int format_operand_xml( x86_op_t *op, x86_insn_t *insn, char *buf,
int len){
char str[MAX_OP_STRING] = "\0";
switch (op->type) {
case op_register:
get_operand_regtype_str( op->data.reg.type, str,
sizeof str );
STRNCAT( buf, "\t\t<register ", len );
STRNCATF( buf, "name=\"%s\" ", op->data.reg.name, len );
STRNCATF( buf, "type=\"%s\" ", str, len );
STRNCATF( buf, "size=%d/>\n", op->data.reg.size, len );
break;
case op_immediate:
get_operand_data_str( op, str, sizeof str );
STRNCAT( buf, "\t\t<immediate ", len );
STRNCATF( buf, "type=\"%s\" ",
get_operand_datatype_str (op), len );
STRNCATF( buf, "value=\"%s\"/>\n", str, len );
break;
case op_relative_near:
STRNCAT( buf, "\t\t<relative_offset ", len );
STRNCATF( buf, "value=\"0x%08" PRIX32 "\"/>\n",
(unsigned int)(op->data.sbyte +
insn->addr + insn->size), len );
break;
case op_relative_far:
STRNCAT( buf, "\t\t<relative_offset ", len );
if (op->datatype == op_word) {
STRNCATF( buf, "value=\"0x%08" PRIX32 "\"/>\n",
(unsigned int)(op->data.sword +
insn->addr + insn->size), len);
break;
} else {
STRNCATF( buf, "value=\"0x%08" PRIX32 "\"/>\n",
op->data.sdword + insn->addr + insn->size,
len );
}
break;
case op_absolute:
STRNCATF( buf,
"\t\t<absolute_address segment=\"0x%04" PRIX16 "\"",
op->data.absolute.segment, len );
if (op->datatype == op_descr16) {
STRNCATF( buf, "offset=\"0x%04" PRIX16 "\">",
op->data.absolute.offset.off16, len );
} else {
STRNCATF( buf, "offset=\"0x%08" PRIX32 "\">",
op->data.absolute.offset.off32, len );
}
STRNCAT( buf, "\t\t</absolute_address>\n", len );
break;
case op_expression:
STRNCAT( buf, "\t\t<address_expression>\n", len );
len -= format_seg( op, buf, len, xml_syntax );
len -= format_expr( &op->data.expression, buf, len,
xml_syntax );
STRNCAT( buf, "\t\t</address_expression>\n", len );
break;
case op_offset:
STRNCAT( buf, "\t\t<segment_offset>\n", len );
len -= format_seg( op, buf, len, xml_syntax );
STRNCAT( buf, "\t\t\t<address ", len);
STRNCATF( buf, "value=\"0x%08" PRIX32 "\"/>\n",
op->data.sdword, len );
STRNCAT( buf, "\t\t</segment_offset>\n", len );
break;
case op_unused:
case op_unknown:
/* return 0-truncated buffer */
break;
}
return( strlen( buf ) );
}
static int format_operand_raw( x86_op_t *op, x86_insn_t *insn, char *buf,
int len){
char str[MAX_OP_RAW_STRING];
const char *datatype = get_operand_datatype_str(op);
switch (op->type) {
case op_register:
get_operand_regtype_str( op->data.reg.type, str,
sizeof str );
STRNCAT( buf, "reg|", len );
STRNCATF( buf, "%s|", datatype, len );
STRNCATF( buf, "%s:", op->data.reg.name, len );
STRNCATF( buf, "%s:", str, len );
STRNCATF( buf, "%d|", op->data.reg.size, len );
break;
case op_immediate:
get_operand_data_str( op, str, sizeof str );
STRNCAT( buf, "immediate|", len );
STRNCATF( buf, "%s|", datatype, len );
STRNCATF( buf, "%s|", str, len );
break;
case op_relative_near:
/* NOTE: in raw format, we print the
* relative offset, not the actual
* address of the jump target */
STRNCAT( buf, "relative|", len );
STRNCATF( buf, "%s|", datatype, len );
STRNCATF( buf, "%" PRId8 "|", op->data.sbyte, len );
break;
case op_relative_far:
STRNCAT( buf, "relative|", len );
STRNCATF( buf, "%s|", datatype, len );
if (op->datatype == op_word) {
STRNCATF( buf, "%" PRId16 "|", op->data.sword, len);
break;
} else {
STRNCATF( buf, "%" PRId32 "|", op->data.sdword, len );
}
break;
case op_absolute:
STRNCAT( buf, "absolute_address|", len );
STRNCATF( buf, "%s|", datatype, len );
STRNCATF( buf, "$0x%04" PRIX16 ":", op->data.absolute.segment,
len );
if (op->datatype == op_descr16) {
STRNCATF( buf, "0x%04" PRIX16 "|",
op->data.absolute.offset.off16, len );
} else {
STRNCATF( buf, "0x%08" PRIX32 "|",
op->data.absolute.offset.off32, len );
}
break;
case op_expression:
STRNCAT( buf, "address_expression|", len );
STRNCATF( buf, "%s|", datatype, len );
len -= format_seg( op, buf, len, native_syntax );
len -= format_expr( &op->data.expression, buf, len,
raw_syntax );
STRNCAT( buf, "|", len );
break;
case op_offset:
STRNCAT( buf, "segment_offset|", len );
STRNCATF( buf, "%s|", datatype, len );
len -= format_seg( op, buf, len, xml_syntax );
STRNCATF( buf, "%08" PRIX32 "|", op->data.sdword, len );
break;
case op_unused:
case op_unknown:
/* return 0-truncated buffer */
break;
}
return( strlen( buf ) );
}
int x86_format_operand( x86_op_t *op, char *buf, int len,
enum x86_asm_format format ){
x86_insn_t *insn;
if ( ! op || ! buf || len < 1 ) {
return(0);
}
/* insn is stored in x86_op_t since .21-pre3 */
insn = (x86_insn_t *) op->insn;
memset( buf, 0, len );
switch ( format ) {
case att_syntax:
return format_operand_att( op, insn, buf, len );
case xml_syntax:
return format_operand_xml( op, insn, buf, len );
case raw_syntax:
return format_operand_raw( op, insn, buf, len );
case native_syntax:
case intel_syntax:
default:
return format_operand_native( op, insn, buf, len );
}
}
#define is_imm_jmp(op) (op->type == op_absolute || \
op->type == op_immediate || \
op->type == op_offset)
#define is_memory_op(op) (op->type == op_absolute || \
op->type == op_expression || \
op->type == op_offset)
static int format_att_mnemonic( x86_insn_t *insn, char *buf, int len) {
int size = 0;
const char *suffix;
if (! insn || ! buf || ! len )
return(0);
memset( buf, 0, len );
/* do long jump/call prefix */
if ( insn->type == insn_jmp || insn->type == insn_call ) {
if (! is_imm_jmp( x86_operand_1st(insn) ) ||
(x86_operand_1st(insn))->datatype != op_byte ) {
/* far jump/call, use "l" prefix */
STRNCAT( buf, "l", len );
}
STRNCAT( buf, insn->mnemonic, len );
return ( strlen( buf ) );
}
/* do mnemonic */
STRNCAT( buf, insn->mnemonic, len );
/* do suffixes for memory operands */
if (!(insn->note & insn_note_nosuffix) &&
(insn->group == insn_arithmetic ||
insn->group == insn_logic ||
insn->group == insn_move ||
insn->group == insn_stack ||
insn->group == insn_string ||
insn->group == insn_comparison ||
insn->type == insn_in ||
insn->type == insn_out
)) {
if ( x86_operand_count( insn, op_explicit ) > 0 &&
is_memory_op( x86_operand_1st(insn) ) ){
size = x86_operand_size( x86_operand_1st( insn ) );
} else if ( x86_operand_count( insn, op_explicit ) > 1 &&
is_memory_op( x86_operand_2nd(insn) ) ){
size = x86_operand_size( x86_operand_2nd( insn ) );
}
}
if ( size == 1 ) suffix = "b";
else if ( size == 2 ) suffix = "w";
else if ( size == 4 ) suffix = "l";
else if ( size == 8 ) suffix = "q";
else suffix = "";
STRNCAT( buf, suffix, len );
return ( strlen( buf ) );
}
int x86_format_mnemonic(x86_insn_t *insn, char *buf, int len,
enum x86_asm_format format){
char str[MAX_OP_STRING];
memset( buf, 0, len );
STRNCAT( buf, insn->prefix_string, len );
if ( format == att_syntax ) {
format_att_mnemonic( insn, str, sizeof str );
STRNCAT( buf, str, len );
} else {
STRNCAT( buf, insn->mnemonic, len );
}
return( strlen( buf ) );
}
struct op_string { char *buf; size_t len; };
static void format_op_raw( x86_op_t *op, x86_insn_t *insn, void *arg ) {
struct op_string * opstr = (struct op_string *) arg;
format_operand_raw(op, insn, opstr->buf, opstr->len);
}
static int format_insn_note(x86_insn_t *insn, char *buf, int len){
char note[32] = {0};
int len_orig = len, note_len = 32;
if ( insn->note & insn_note_ring0 ) {
STRNCATF( note, "%s", "Ring0 ", note_len );
}
if ( insn->note & insn_note_smm ) {
STRNCATF( note, "%s", "SMM ", note_len );
}
if ( insn->note & insn_note_serial ) {
STRNCATF(note, "%s", "Serialize ", note_len );
}
STRNCATF( buf, "%s|", note, len );
return( len_orig - len );
}
static int format_raw_insn( x86_insn_t *insn, char *buf, int len ){
struct op_string opstr = { buf, len };
int i;
/* RAW style:
* ADDRESS|OFFSET|SIZE|BYTES|
* PREFIX|PREFIX_STRING|GROUP|TYPE|NOTES|
* MNEMONIC|CPU|ISA|FLAGS_SET|FLAGS_TESTED|
* STACK_MOD|STACK_MOD_VAL
* [|OP_TYPE|OP_DATATYPE|OP_ACCESS|OP_FLAGS|OP]*
*
* Register values are encoded as:
* NAME:TYPE:SIZE
*
* Effective addresses are encoded as:
* disp(base_reg,index_reg,scale)
*/
STRNCATF( buf, "0x%08" PRIX32 "|", insn->addr , len );
STRNCATF( buf, "0x%08" PRIX32 "|", insn->offset, len );
STRNCATF( buf, "%d|" , insn->size , len );
/* print bytes */
for ( i = 0; i < insn->size; i++ ) {
STRNCATF( buf, "%02X ", insn->bytes[i], len );
}
STRNCAT( buf, "|", len );
len -= format_insn_prefix_str( insn->prefix, buf, len );
STRNCATF( buf, "|%s|", insn->prefix_string , len );
STRNCATF( buf, "%s|", get_insn_group_str( insn->group ), len );
STRNCATF( buf, "%s|", get_insn_type_str( insn->type ) , len );
STRNCATF( buf, "%s|", insn->mnemonic , len );
STRNCATF( buf, "%s|", get_insn_cpu_str( insn->cpu ) , len );
STRNCATF( buf, "%s|", get_insn_isa_str( insn->isa ) , len );
/* insn note */
len -= format_insn_note( insn, buf, len );
len -= format_insn_eflags_str( insn->flags_set, buf, len );
STRNCAT( buf, "|", len );
len -= format_insn_eflags_str( insn->flags_tested, buf, len );
STRNCAT( buf, "|", len );
STRNCATF( buf, "%d|", insn->stack_mod, len );
STRNCATF( buf, "%" PRId32 "|", insn->stack_mod_val, len );
opstr.len = len;
x86_operand_foreach( insn, format_op_raw, &opstr, op_any );
return( strlen (buf) );
}
static int format_xml_insn( x86_insn_t *insn, char *buf, int len ) {
char str[MAX_OP_XML_STRING];
int i;
STRNCAT( buf, "<x86_insn>\n", len );
STRNCATF( buf, "\t<address rva=\"0x%08" PRIX32 "\" ", insn->addr, len );
STRNCATF( buf, "offset=\"0x%08" PRIX32 "\" ", insn->offset, len );
STRNCATF( buf, "size=%d bytes=\"", insn->size, len );
for ( i = 0; i < insn->size; i++ ) {
STRNCATF( buf, "%02X ", insn->bytes[i], len );
}
STRNCAT( buf, "\"/>\n", len );
STRNCAT( buf, "\t<prefix type=\"", len );
len -= format_insn_prefix_str( insn->prefix, buf, len );
STRNCATF( buf, "\" string=\"%s\"/>\n", insn->prefix_string, len );
STRNCATF( buf, "\t<mnemonic group=\"%s\" ",
get_insn_group_str (insn->group), len );
STRNCATF( buf, "type=\"%s\" ", get_insn_type_str (insn->type), len );
STRNCATF( buf, "string=\"%s\"/>\n", insn->mnemonic, len );
STRNCAT( buf, "\t<flags type=set>\n", len );
STRNCAT( buf, "\t\t<flag name=\"", len );
len -= format_insn_eflags_str( insn->flags_set, buf, len );
STRNCAT( buf, "\"/>\n\t</flags>\n", len );
STRNCAT( buf, "\t<flags type=tested>\n", len );
STRNCAT( buf, "\t\t<flag name=\"", len );
len -= format_insn_eflags_str( insn->flags_tested, buf, len );
STRNCAT( buf, "\"/>\n\t</flags>\n", len );
if ( x86_operand_1st( insn ) ) {
x86_format_operand( x86_operand_1st(insn), str,
sizeof str, xml_syntax);
STRNCAT( buf, "\t<operand name=dest>\n", len );
STRNCAT( buf, str, len );
STRNCAT( buf, "\t</operand>\n", len );
}
if ( x86_operand_2nd( insn ) ) {
x86_format_operand( x86_operand_2nd( insn ), str,
sizeof str, xml_syntax);
STRNCAT( buf, "\t<operand name=src>\n", len );
STRNCAT( buf, str, len );
STRNCAT( buf, "\t</operand>\n", len );
}
if ( x86_operand_3rd( insn ) ) {
x86_format_operand( x86_operand_3rd(insn), str,
sizeof str, xml_syntax);
STRNCAT( buf, "\t<operand name=imm>\n", len );
STRNCAT( buf, str, len );
STRNCAT( buf, "\t</operand>\n", len );
}
STRNCAT( buf, "</x86_insn>\n", len );
return strlen (buf);
}
int x86_format_header( char *buf, int len, enum x86_asm_format format ) {
switch (format) {
case att_syntax:
snprintf( buf, len, "MNEMONIC\tSRC, DEST, IMM" );
break;
case intel_syntax:
snprintf( buf, len, "MNEMONIC\tDEST, SRC, IMM" );
break;
case native_syntax:
snprintf( buf, len, "ADDRESS\tBYTES\tMNEMONIC\t"
"DEST\tSRC\tIMM" );
break;
case raw_syntax:
snprintf( buf, len, "ADDRESS|OFFSET|SIZE|BYTES|"
"PREFIX|PREFIX_STRING|GROUP|TYPE|NOTES|"
"MNEMONIC|CPU|ISA|FLAGS_SET|FLAGS_TESTED|"
"STACK_MOD|STACK_MOD_VAL"
"[|OP_TYPE|OP_DATATYPE|OP_ACCESS|OP_FLAGS|OP]*"
);
break;
case xml_syntax:
snprintf( buf, len,
"<x86_insn>"
"<address rva= offset= size= bytes=/>"
"<prefix type= string=/>"
"<mnemonic group= type= string= "
"cpu= isa= note= />"
"<flags type=set>"
"<flag name=>"
"</flags>"
"<stack_mod val= >"
"<flags type=tested>"
"<flag name=>"
"</flags>"
"<operand name=>"
"<register name= type= size=/>"
"<immediate type= value=/>"
"<relative_offset value=/>"
"<absolute_address value=>"
"<segment value=/>"
"</absolute_address>"
"<address_expression>"
"<segment value=/>"
"<base>"
"<register name= type= size=/>"
"</base>"
"<index>"
"<register name= type= size=/>"
"</index>"
"<scale>"
"<immediate value=/>"
"</scale>"
"<displacement>"
"<immediate value=/>"
"<address value=/>"
"</displacement>"
"</address_expression>"
"<segment_offset>"
"<address value=/>"
"</segment_offset>"
"</operand>"
"</x86_insn>"
);
break;
case unknown_syntax:
if ( len ) {
buf[0] = '\0';
}
break;
}
return( strlen(buf) );
}
int x86_format_insn( x86_insn_t *insn, char *buf, int len,
enum x86_asm_format format ){
char str[MAX_OP_STRING];
x86_op_t *src, *dst;
int i;
memset(buf, 0, len);
if ( format == intel_syntax ) {
/* INTEL STYLE: mnemonic dest, src, imm */
STRNCAT( buf, insn->prefix_string, len );
STRNCAT( buf, insn->mnemonic, len );
STRNCAT( buf, "\t", len );
/* dest */
if ( (dst = x86_operand_1st( insn )) && !(dst->flags & op_implied) ) {
x86_format_operand( dst, str, MAX_OP_STRING, format);
STRNCAT( buf, str, len );
}
/* src */
if ( (src = x86_operand_2nd( insn )) ) {
if ( !(dst->flags & op_implied) ) {
STRNCAT( buf, ", ", len );
}
x86_format_operand( src, str, MAX_OP_STRING, format);
STRNCAT( buf, str, len );
}
/* imm */
if ( x86_operand_3rd( insn )) {
STRNCAT( buf, ", ", len );
x86_format_operand( x86_operand_3rd( insn ),
str, MAX_OP_STRING, format);
STRNCAT( buf, str, len );
}
} else if ( format == att_syntax ) {
/* ATT STYLE: mnemonic src, dest, imm */
STRNCAT( buf, insn->prefix_string, len );
format_att_mnemonic(insn, str, MAX_OP_STRING);
STRNCATF( buf, "%s\t", str, len);
/* not sure which is correct? sometimes GNU as requires
* an imm as the first operand, sometimes as the third... */
/* imm */
if ( x86_operand_3rd( insn ) ) {
x86_format_operand(x86_operand_3rd( insn ),
str, MAX_OP_STRING, format);
STRNCAT( buf, str, len );
/* there is always 'dest' operand if there is 'src' */
STRNCAT( buf, ", ", len );
}
if ( (insn->note & insn_note_nonswap ) == 0 ) {
/* regular AT&T style swap */
src = x86_operand_2nd( insn );
dst = x86_operand_1st( insn );
}
else {
/* special-case instructions */
src = x86_operand_1st( insn );
dst = x86_operand_2nd( insn );
}
/* src */
if ( src ) {
x86_format_operand(src, str, MAX_OP_STRING, format);
STRNCAT( buf, str, len );
/* there is always 'dest' operand if there is 'src' */
if ( dst && !(dst->flags & op_implied) ) {
STRNCAT( buf, ", ", len );
}
}
/* dest */
if ( dst && !(dst->flags & op_implied) ) {
x86_format_operand( dst, str, MAX_OP_STRING, format);
STRNCAT( buf, str, len );
}
} else if ( format == raw_syntax ) {
format_raw_insn( insn, buf, len );
} else if ( format == xml_syntax ) {
format_xml_insn( insn, buf, len );
} else { /* default to native */
/* NATIVE style: RVA\tBYTES\tMNEMONIC\tOPERANDS */
/* print address */
STRNCATF( buf, "%08" PRIX32 "\t", insn->addr, len );
/* print bytes */
for ( i = 0; i < insn->size; i++ ) {
STRNCATF( buf, "%02X ", insn->bytes[i], len );
}
STRNCAT( buf, "\t", len );
/* print mnemonic */
STRNCAT( buf, insn->prefix_string, len );
STRNCAT( buf, insn->mnemonic, len );
STRNCAT( buf, "\t", len );
/* print operands */
/* dest */
if ( x86_operand_1st( insn ) ) {
x86_format_operand( x86_operand_1st( insn ),
str, MAX_OP_STRING, format);
STRNCATF( buf, "%s\t", str, len );
}
/* src */
if ( x86_operand_2nd( insn ) ) {
x86_format_operand(x86_operand_2nd( insn ),
str, MAX_OP_STRING, format);
STRNCATF( buf, "%s\t", str, len );
}
/* imm */
if ( x86_operand_3rd( insn )) {
x86_format_operand( x86_operand_3rd( insn ),
str, MAX_OP_STRING, format);
STRNCAT( buf, str, len );
}
}
return( strlen( buf ) );
}