/**
 * @file bfd_support.cpp
 * BFD muck we have to deal with.
 *
 * @remark Copyright 2005 OProfile authors
 * @remark Read the file COPYING
 *
 * @author John Levon
 */

#include "bfd_support.h"

#include "op_bfd.h"
#include "op_fileio.h"
#include "op_config.h"
#include "string_manip.h"
#include "file_manip.h"
#include "cverb.h"
#include "locate_images.h"

#include <cstdlib>
#include <cstring>
#include <cassert>
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <cstring>
#include <cstdlib>

using namespace std;

extern verbose vbfd;

namespace {


void check_format(string const & file, bfd ** ibfd)
{
	if (!bfd_check_format_matches(*ibfd, bfd_object, NULL)) {
		cverb << vbfd << "BFD format failure for " << file << endl;
		bfd_close(*ibfd);
		*ibfd = NULL;
	}
}


bool separate_debug_file_exists(string & name, unsigned long const crc, 
                                extra_images const & extra)
{
	unsigned long file_crc = 0;
	// The size of 2 * 1024 elements for the buffer is arbitrary.
	char buffer[2 * 1024];

	image_error img_ok;
	string const image_path = extra.find_image_path(name, img_ok, true);

	if (img_ok != image_ok)
		return false;

	name = image_path;

	ifstream file(image_path.c_str());
	if (!file)
		return false;

	cverb << vbfd << "found " << name;
	while (file) {
		file.read(buffer, sizeof(buffer));
		file_crc = calc_crc32(file_crc, 
				      reinterpret_cast<unsigned char *>(&buffer[0]),
				      file.gcount());
	}
	cverb << vbfd << " with crc32 = " << hex << file_crc << endl;
	return crc == file_crc;
}


bool get_debug_link_info(bfd * ibfd, string & filename, unsigned long & crc32)
{
	asection * sect;

	cverb << vbfd << "fetching .gnu_debuglink section" << endl;
	sect = bfd_get_section_by_name(ibfd, ".gnu_debuglink");
	
	if (sect == NULL)
		return false;
	
	bfd_size_type debuglink_size = bfd_section_size(ibfd, sect);  
	char contents[debuglink_size];
	cverb << vbfd
	      << ".gnu_debuglink section has size " << debuglink_size << endl;
	
	if (!bfd_get_section_contents(ibfd, sect, 
				 reinterpret_cast<unsigned char *>(contents), 
				 static_cast<file_ptr>(0), debuglink_size)) {
		bfd_perror("bfd_get_section_contents:get_debug:");
		exit(2);
	}
	
	/* CRC value is stored after the filename, aligned up to 4 bytes. */
	size_t filename_len = strlen(contents);
	size_t crc_offset = filename_len + 1;
	crc_offset = (crc_offset + 3) & ~3;
	
	crc32 = bfd_get_32(ibfd, 
			       reinterpret_cast<bfd_byte *>(contents + crc_offset));
	filename = string(contents, filename_len);
	cverb << vbfd << ".gnu_debuglink filename is " << filename << endl;
	return true;
}


/**
 * With Objective C, we'll get strings like:
 *
 * _i_GSUnicodeString__rangeOfCharacterSetFromSet_options_range
 *
 * for the symbol name, and:
 * -[GSUnicodeString rangeOfCharacterFromSet:options:range:]
 *
 * for the function name, so we have to do some looser matching
 * than for other languages (unfortunately, it's not possible
 * to demangle Objective C symbols).
 */
bool objc_match(string const & sym, string const & method)
{
	if (method.length() < 3)
		return false;

	string mangled;

	if (is_prefix(method, "-[")) {
		mangled += "_i_";
	} else if (is_prefix(method, "+[")) {
		mangled += "_c_";
	} else {
		return false;
	}

	string::const_iterator it = method.begin() + 2;
	string::const_iterator const end = method.end();

	bool found_paren = false;

	for (; it != end; ++it) {
		switch (*it) {
		case ' ':
			mangled += '_';
			if (!found_paren)
				mangled += '_';
			break;
		case ':':
			mangled += '_';
			break;
		case ')':
		case ']':
			break;
		case '(':
			found_paren = true;
			mangled += '_';
			break;
		default:
			mangled += *it;	
		}
	}

	return sym == mangled;
}


/*
 * With a binary image where some objects are missing debug
 * info, we can end up attributing to a completely different
 * function (#484660): bfd_nearest_line() will happily move from one
 * symbol to the nearest one it can find with debug information.
 * To mitigate this problem, we check that the symbol name
 * matches the returned function name.
 *
 * However, this check fails in some cases it shouldn't:
 * Objective C, and C++ static inline functions (as discussed in
 * GCC bugzilla #11774). So, we have a looser check that
 * accepts merely a substring, plus some magic for Objective C.
 *
 * If even the loose check fails, then we give up.
 */
bool is_correct_function(string const & function, string const & name)
{
	if (name == function)
		return true;

	if (objc_match(name, function))
		return true;

	// warn the user if we had to use the loose check
	if (name.find(function) != string::npos) {
		static bool warned = false;
		if (!warned) {
			cerr << "warning: some functions compiled without "
			     << "debug information may have incorrect source "
			     << "line attributions" << endl;
				warned = true;
		}
		cverb << vbfd << "is_correct_function(" << function << ", "
		      << name << ") fuzzy match." << endl;
		return true;
	}

	return false;
}


/*
 * binutils 2.12 and below have a small bug where functions without a
 * debug entry at the prologue start do not give a useful line number
 * from bfd_find_nearest_line(). This can happen with certain gcc
 * versions such as 2.95.
 *
 * We work around this problem by scanning forward for a vma with valid
 * linenr info, if we can't get a valid line number.  Problem uncovered
 * by Norbert Kaufmann. The work-around decreases, on the tincas
 * application, the number of failure to retrieve linenr info from 835
 * to 173. Most of the remaining are c++ inline functions mainly from
 * the STL library. Fix #529622
 */
void fixup_linenr(bfd * abfd, asection * section, asymbol ** syms,
		  string const & name, bfd_vma pc,
                  char const ** filename, unsigned int * line)
{
	char const * cfilename;
	char const * function;
	unsigned int linenr;

	// FIXME: looking at debug info for all gcc version shows than
	// the same problems can -perhaps- occur for epilog code: find a
	// samples files with samples in epilog and try opreport -l -g
	// on it, check it also with opannotate.

	// first restrict the search on a sensible range of vma, 16 is
	// an intuitive value based on epilog code look
	size_t max_search = 16;
	size_t section_size = bfd_section_size(abfd, section);
	if (pc + max_search > section_size)
		max_search = section_size - pc;

	for (size_t i = 1; i < max_search; ++i) {
		bool ret = bfd_find_nearest_line(abfd, section, syms, pc + i,
						 &cfilename, &function,
						 &linenr);

		if (ret && cfilename && function && linenr != 0
		    && is_correct_function(function, name)) {
			*filename = cfilename;
			*line = linenr;
			return;
		}
	}
}


} // namespace anon


bfd * open_bfd(string const & file)
{
	/* bfd keeps its own reference to the filename char *,
	 * so it must have a lifetime longer than the ibfd */
	bfd * ibfd = bfd_openr(file.c_str(), NULL);
	if (!ibfd) {
		cverb << vbfd << "bfd_openr failed for " << file << endl;
		return NULL;
	}

	check_format(file, &ibfd);

	return ibfd;
}


bfd * fdopen_bfd(string const & file, int fd)
{
	/* bfd keeps its own reference to the filename char *,
	 * so it must have a lifetime longer than the ibfd */
	bfd * ibfd = bfd_fdopenr(file.c_str(), NULL, fd);
	if (!ibfd) {
		cverb << vbfd << "bfd_openr failed for " << file << endl;
		return NULL;
	}

	check_format(file, &ibfd);

	return ibfd;
}


bool find_separate_debug_file(bfd * ibfd, string const & filepath_in, 
                              string & debug_filename, extra_images const & extra)
{
	string filepath(filepath_in);
	string basename;
	unsigned long crc32;
	
	if (!get_debug_link_info(ibfd, basename, crc32))
		return false;

	// Work out the image file's directory prefix
	string filedir = op_dirname(filepath);
	// Make sure it starts with /
	if (filedir.size() > 0 && filedir.at(filedir.size() - 1) != '/')
		filedir += '/';

	string first_try(filedir + ".debug/" + basename);
	string second_try(DEBUGDIR + filedir + basename);
	string third_try(filedir + basename);

	cverb << vbfd << "looking for debugging file " << basename 
	      << " with crc32 = " << hex << crc32 << endl;

	if (separate_debug_file_exists(first_try, crc32, extra)) 
		debug_filename = first_try; 
	else if (separate_debug_file_exists(second_try, crc32, extra))
		debug_filename = second_try;
	else if (separate_debug_file_exists(third_try, crc32, extra))
		debug_filename = third_try;
	else
		return false;
	
	return true;
}


bool interesting_symbol(asymbol * sym)
{
	// #717720 some binutils are miscompiled by gcc 2.95, one of the
	// typical symptom can be catched here.
	if (!sym->section) {
		ostringstream os;
		os << "Your version of binutils seems to have a bug.\n"
		   << "Read http://oprofile.sf.net/faq/#binutilsbug\n";
		throw op_runtime_error(os.str());
	}

	if (!(sym->section->flags & SEC_CODE))
		return false;

	// returning true for fix up in op_bfd_symbol()
	if (!sym->name || sym->name[0] == '\0')
		return true;
	/* ARM assembler internal mapping symbols aren't interesting */
	if ((strcmp("$a", sym->name) == 0) ||
	    (strcmp("$t", sym->name) == 0) ||
	    (strcmp("$d", sym->name) == 0))
		return false;

	// C++ exception stuff
	if (sym->name[0] == '.' && sym->name[1] == 'L')
		return false;

	/* This case cannot be moved to boring_symbol(),
	 * because that's only used for duplicate VMAs,
	 * and sometimes this symbol appears at an address
	 * different from all other symbols.
	 */
	if (!strcmp("gcc2_compiled.", sym->name))
		return false;

        if (sym->flags & BSF_SECTION_SYM)
                return false;

	if (!(sym->section->flags & SEC_LOAD))
		return false;

	return true;
}


bool boring_symbol(op_bfd_symbol const & first, op_bfd_symbol const & second)
{
	if (first.name() == "Letext")
		return true;
	else if (second.name() == "Letext")
		return false;

	if (first.name().substr(0, 2) == "??")
		return true;
	else if (second.name().substr(0, 2) == "??")
		return false;

	if (first.hidden() && !second.hidden())
		return true;
	else if (!first.hidden() && second.hidden())
		return false;

	if (first.name()[0] == '_' && second.name()[0] != '_')
		return true;
	else if (first.name()[0] != '_' && second.name()[0] == '_')
		return false;

	if (first.weak() && !second.weak())
		return true;
	else if (!first.weak() && second.weak())
		return false;

	return false;
}


bool bfd_info::has_debug_info() const
{
	if (!valid())
		return false;

	for (asection const * sect = abfd->sections; sect; sect = sect->next) {
		if (sect->flags & SEC_DEBUGGING)
			return true;
	}

	return false;
}


bfd_info::~bfd_info()
{
	free(synth_syms);
	close();
}


void bfd_info::close()
{
	if (abfd)
		bfd_close(abfd);
}

/**
 * This function is only called when processing symbols retrieved from a
 * debuginfo file that is separate from the actual runtime binary image.
 * Separate debuginfo files may be needed in two different cases:
 *   1) the real image is completely stripped, where there is no symbol
	information at all
 *   2) the real image has debuginfo stripped, and the user is requesting "-g"
 *   (src file/line num info)
 * After all symbols are gathered up, there will be some filtering/removal of
 * unnecessary symbols.  In particular, the bfd_info::interesting_symbol()
 * function filters out symbols whose section's flag value does not include
 * SEC_LOAD.  This filtering is required, so it must be retained.  However,
 * we run into a problem with symbols from debuginfo files, since the
 * section flag does NOT include SEC_LOAD.  To solve this problem, the
 * translate_debuginfo_syms function maps the debuginfo symbol's sections to
 * that of their corresponding real image.
*/
void bfd_info::translate_debuginfo_syms(asymbol ** dbg_syms, long nr_dbg_syms)
{
	unsigned int img_sect_cnt = 0;
	bfd * image_bfd = image_bfd_info->abfd;
	multimap<string, bfd_section *> image_sections;

	for (bfd_section * sect = image_bfd->sections;
	     sect && img_sect_cnt < image_bfd->section_count;
	     sect = sect->next) {
		// A comment section marks the end of the needed sections
		if (strstr(sect->name, ".comment") == sect->name)
			break;
		image_sections.insert(pair<string, bfd_section *>(sect->name, sect));
		img_sect_cnt++;
	}

	asymbol * sym = dbg_syms[0];
	string prev_sect_name = "";
	bfd_section * matched_section = NULL;
	for (int i = 0; i < nr_dbg_syms; sym = dbg_syms[++i]) {
		bool section_switch;

		if (strcmp(prev_sect_name.c_str(), sym->section->name)) {
			section_switch = true;
			prev_sect_name = sym->section->name;
		} else {
			section_switch = false;
		}
		if (sym->section->owner && sym->section->owner == abfd) {
			if (section_switch ) {
				matched_section = NULL;
				multimap<string, bfd_section *>::iterator it;
				pair<multimap<string, bfd_section *>::iterator,
				     multimap<string, bfd_section *>::iterator> range;

				range = image_sections.equal_range(sym->section->name);
				for (it = range.first; it != range.second; it++) {
					if ((*it).second->vma == sym->section->vma) {
						matched_section = (*it).second;
						break;
					}
				}
			}
			if (matched_section) {
				sym->section = matched_section;
				sym->the_bfd = image_bfd;
			}
		}
	}
}

#if SYNTHESIZE_SYMBOLS
bool bfd_info::get_synth_symbols()
{
	extern const bfd_target bfd_elf64_powerpc_vec;
	extern const bfd_target bfd_elf64_powerpcle_vec;
	bool is_elf64_powerpc_target = (abfd->xvec == &bfd_elf64_powerpc_vec)
		|| (abfd->xvec == &bfd_elf64_powerpcle_vec);

	if (!is_elf64_powerpc_target)
		return false;

	void * buf;
	uint tmp;
	long nr_mini_syms = bfd_read_minisymbols(abfd, 0, &buf, &tmp);
	if (nr_mini_syms < 1)
		return false;

	asymbol ** mini_syms = (asymbol **)buf;
	buf = NULL;
	bfd * synth_bfd;

	/* For ppc64, a debuginfo file by itself does not hold enough symbol
	 * information for us to properly attribute samples to symbols.  If
	 * the image file's bfd has no symbols (as in a super-stripped library),
	 * then we need to do the extra processing in translate_debuginfo_syms.
	 */
	if (image_bfd_info && image_bfd_info->nr_syms == 0) {
		translate_debuginfo_syms(mini_syms, nr_mini_syms);
		synth_bfd = image_bfd_info->abfd;
	} else
		synth_bfd = abfd;
	
	long nr_synth_syms = bfd_get_synthetic_symtab(synth_bfd,
	                                              nr_mini_syms,
	                                              mini_syms, 0,
	                                              NULL, &synth_syms);

	if (nr_synth_syms < 0) {
		free(mini_syms);
		return false;
	}

	cverb << vbfd << "mini_syms: " << dec << nr_mini_syms << hex << endl;
	cverb << vbfd << "synth_syms: " << dec << nr_synth_syms << hex << endl;

	nr_syms = nr_mini_syms + nr_synth_syms;
	syms.reset(new asymbol *[nr_syms + 1]);

	for (size_t i = 0; i < (size_t)nr_mini_syms; ++i)
		syms[i] = mini_syms[i];


	for (size_t i = 0; i < (size_t)nr_synth_syms; ++i)
		syms[nr_mini_syms + i] = synth_syms + i;
	

	free(mini_syms);

	// bfd_canonicalize_symtab does this, so shall we
	syms[nr_syms] = NULL;

	return true;
}
#else
bool bfd_info::get_synth_symbols()
{
	return false;
}
#endif /* SYNTHESIZE_SYMBOLS */


void bfd_info::get_symbols()
{
	if (!abfd)
		return;

	cverb << vbfd << "bfd_info::get_symbols() for "
	      << bfd_get_filename(abfd) << endl;

	if (get_synth_symbols())
		return;

	if (bfd_get_file_flags(abfd) & HAS_SYMS)
		nr_syms = bfd_get_symtab_upper_bound(abfd);

	cverb << vbfd << "bfd_get_symtab_upper_bound: " << dec
	      << nr_syms << hex << endl;

	nr_syms /= sizeof(asymbol *);

	if (nr_syms < 1)
		return;

	syms.reset(new asymbol *[nr_syms]);

	nr_syms = bfd_canonicalize_symtab(abfd, syms.get());

	if (image_bfd_info)
		translate_debuginfo_syms(syms.get(), nr_syms);

	cverb << vbfd << "bfd_canonicalize_symtab: " << dec
	      << nr_syms << hex << endl;
}


linenr_info const
find_nearest_line(bfd_info const & b, op_bfd_symbol const & sym,
                  bfd_vma offset, bool anon_obj)
{
	char const * function = "";
	char const * cfilename = "";
	unsigned int linenr = 0;
	linenr_info info;
	bfd * abfd;
	asymbol ** syms;
	asection * section;
	bfd_vma pc;
	bool ret;

	if (!b.valid())
		goto fail;

	// take care about artificial symbol
	if (!sym.symbol())
		goto fail;

	abfd = b.abfd;
	syms = b.syms.get();
	if (!syms)
		goto fail;
	section = sym.symbol()->section;
	if (anon_obj)
		pc = offset - sym.symbol()->section->vma;
	else
		pc = (sym.value() + offset) - sym.filepos();

	if ((bfd_get_section_flags(abfd, section) & SEC_ALLOC) == 0)
		goto fail;

	if (pc >= bfd_section_size(abfd, section))
		goto fail;

	ret = bfd_find_nearest_line(abfd, section, syms, pc, &cfilename,
	                                 &function, &linenr);

	if (!ret || !cfilename || !function)
		goto fail;

	/*
	 * is_correct_function does not handle the case of static inlines,
	 * but if the linenr is non-zero in the inline case, it is the correct
	 * line number.
	 */
	if (linenr == 0 && !is_correct_function(function, sym.name()))
		goto fail;

	if (linenr == 0) {
		fixup_linenr(abfd, section, syms, sym.name(), pc, &cfilename,
		             &linenr);
	}

	info.found = true;
	info.filename = cfilename;
	info.line = linenr;
	return info;

fail:
	info.found = false;
	// some stl lacks string::clear()
	info.filename.erase(info.filename.begin(), info.filename.end());
	info.line = 0;
	return info;
}