/*
* This file is part of ltrace.
* Copyright (C) 2012,2013,2014 Petr Machata, Red Hat Inc.
* Copyright (C) 2004,2008,2009 Juan Cespedes
* Copyright (C) 2006 Paul Gilliam
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*/
#include <gelf.h>
#include <sys/ptrace.h>
#include <errno.h>
#include <inttypes.h>
#include <assert.h>
#include <stdbool.h>
#include <string.h>
#include "proc.h"
#include "common.h"
#include "insn.h"
#include "library.h"
#include "breakpoint.h"
#include "linux-gnu/trace.h"
#include "backend.h"
/* There are two PLT types on 32-bit PPC: old-style, BSS PLT, and
* new-style "secure" PLT. We can tell one from the other by the
* flags on the .plt section. If it's +X (executable), it's BSS PLT,
* otherwise it's secure.
*
* BSS PLT works the same way as most architectures: the .plt section
* contains trampolines and we put breakpoints to those. If not
* prelinked, .plt contains zeroes, and dynamic linker fills in the
* initial set of trampolines, which means that we need to delay
* enabling breakpoints until after binary entry point is hit.
* Additionally, after first call, dynamic linker updates .plt with
* branch to resolved address. That means that on first hit, we must
* do something similar to the PPC64 gambit described below.
*
* With secure PLT, the .plt section doesn't contain instructions but
* addresses. The real PLT table is stored in .text. Addresses of
* those PLT entries can be computed, and apart from the fact that
* they are in .text, they are ordinary PLT entries.
*
* 64-bit PPC is more involved. Program linker creates for each
* library call a _stub_ symbol named xxxxxxxx.plt_call.<callee>
* (where xxxxxxxx is a hexadecimal number). That stub does the call
* dispatch: it loads an address of a function to call from the
* section .plt, and branches. PLT entries themselves are essentially
* a curried call to the resolver. When the symbol is resolved, the
* resolver updates the value stored in .plt, and the next time
* around, the stub calls the library function directly. So we make
* at most one trip (none if the binary is prelinked) through each PLT
* entry, and correspondingly that is useless as a breakpoint site.
*
* Note the three confusing terms: stubs (that play the role of PLT
* entries), PLT entries, .plt section.
*
* We first check symbol tables and see if we happen to have stub
* symbols available. If yes we just put breakpoints to those, and
* treat them as usual breakpoints. The only tricky part is realizing
* that there can be more than one breakpoint per symbol.
*
* The case that we don't have the stub symbols available is harder.
* The following scheme uses two kinds of PLT breakpoints: unresolved
* and resolved (to some address). When the process starts (or when
* we attach), we distribute unresolved PLT breakpoints to the PLT
* entries (not stubs). Then we look in .plt, and for each entry
* whose value is different than the corresponding PLT entry address,
* we assume it was already resolved, and convert the breakpoint to
* resolved. We also rewrite the resolved value in .plt back to the
* PLT address.
*
* When a PLT entry hits a resolved breakpoint (which happens because
* we rewrite .plt with the original unresolved addresses), we move
* the instruction pointer to the corresponding address and continue
* the process as if nothing happened.
*
* When unresolved PLT entry is called for the first time, we need to
* catch the new value that the resolver will write to a .plt slot.
* We also need to prevent another thread from racing through and
* taking the branch without ltrace noticing. So when unresolved PLT
* entry hits, we have to stop all threads. We then single-step
* through the resolver, until the .plt slot changes. When it does,
* we treat it the same way as above: convert the PLT breakpoint to
* resolved, and rewrite the .plt value back to PLT address. We then
* start all threads again.
*
* As an optimization, we remember the address where the address was
* resolved, and put a breakpoint there. The next time around (when
* the next PLT entry is to be resolved), instead of single-stepping
* through half the dynamic linker, we just let the thread run and hit
* this breakpoint. When it hits, we know the PLT entry was resolved.
*
* Another twist comes from tracing slots corresponding to
* R_PPC64_JMP_IREL relocations. These have no dedicated PLT entry.
* The calls are done directly from stubs, and the .plt entry
* (actually .iplt entry, these live in a special section) is resolved
* in advance before the binary starts. Because there's no PLT entry,
* we put the PLT breakpoints directly to the IFUNC resolver code, and
* then would like them to behave like ordinary PLT slots, including
* catching the point where these get resolved to unresolve them. So
* for the first call (which is the actual resolver call), we pretend
* that this breakpoint is artificial and has no associated symbol,
* and turn it on fully only after the first hit. Ideally we would
* trace that first call as well, but then the stepper, which tries to
* catch the point where the slot is resolved, would hit the return
* breakpoint and that's not currently handled well.
*
* On PPC32 with secure PLT, the address of IFUNC symbols in main
* binary actually isn't of the resolver, but of a PLT slot. We
* therefore have to locate the corresponding PLT relocation (which is
* of type R_PPC_IRELATIVE) and request that it be traced. The addend
* of that relocation is an address of resolver, and we request
* tracing of the xyz.IFUNC symbol there.
*
* XXX TODO If we have hardware watch point, we might put a read watch
* on .plt slot, and discover the offenders this way. I don't know
* the details, but I assume at most a handful (like, one or two, if
* available at all) addresses may be watched at a time, and thus this
* would be used as an amendment of the above rather than full-on
* solution to PLT tracing on PPC.
*/
#define PPC_PLT_STUB_SIZE 16
#define PPC64_PLT_STUB_SIZE 8 //xxx
static inline int
host_powerpc64()
{
#ifdef __powerpc64__
return 1;
#else
return 0;
#endif
}
static void
mark_as_resolved(struct library_symbol *libsym, GElf_Addr value)
{
libsym->arch.type = PPC_PLT_RESOLVED;
libsym->arch.resolved_value = value;
}
static void
ppc32_delayed_symbol(struct library_symbol *libsym)
{
/* arch_dynlink_done is called on attach as well. In that
* case some slots will have been resolved already.
* Unresolved PLT looks like this:
*
* <sleep@plt>: li r11,0
* <sleep@plt+4>: b "resolve"
*
* "resolve" is another address in PLTGOT (the same block that
* all the PLT slots are it). When resolved, it looks either
* this way:
*
* <sleep@plt>: b 0xfea88d0 <sleep>
*
* Which is easy to detect. It can also look this way:
*
* <sleep@plt>: li r11,0
* <sleep@plt+4>: b "dispatch"
*
* The "dispatch" address lies in PLTGOT as well. In current
* GNU toolchain, "dispatch" address is the same as PLTGOT
* address. We rely on this to figure out whether the address
* is resolved or not. */
uint32_t insn1 = libsym->arch.resolved_value >> 32;
uint32_t insn2 = (uint32_t) libsym->arch.resolved_value;
if ((insn1 & BRANCH_MASK) == B_INSN
|| ((insn2 & BRANCH_MASK) == B_INSN
/* XXX double cast */
&& (ppc_branch_dest(libsym->enter_addr + 4, insn2)
== (arch_addr_t) (long) libsym->lib->arch.pltgot_addr)))
{
mark_as_resolved(libsym, libsym->arch.resolved_value);
}
}
void
arch_dynlink_done(struct process *proc)
{
/* We may need to activate delayed symbols. */
struct library_symbol *libsym = NULL;
while ((libsym = proc_each_symbol(proc, libsym,
library_symbol_delayed_cb, NULL))) {
if (proc_read_64(proc, libsym->enter_addr,
&libsym->arch.resolved_value) < 0) {
fprintf(stderr,
"couldn't read PLT value for %s(%p): %s\n",
libsym->name, libsym->enter_addr,
strerror(errno));
return;
}
if (proc->e_machine == EM_PPC)
ppc32_delayed_symbol(libsym);
if (proc_activate_delayed_symbol(proc, libsym) < 0)
return;
if (proc->e_machine == EM_PPC)
/* XXX double cast */
libsym->arch.plt_slot_addr
= (GElf_Addr) (uintptr_t) libsym->enter_addr;
}
}
static bool
reloc_is_irelative(int machine, GElf_Rela *rela)
{
bool irelative = false;
if (machine == EM_PPC64) {
#ifdef R_PPC64_JMP_IREL
irelative = GELF_R_TYPE(rela->r_info) == R_PPC64_JMP_IREL;
#endif
} else {
assert(machine == EM_PPC);
#ifdef R_PPC_IRELATIVE
irelative = GELF_R_TYPE(rela->r_info) == R_PPC_IRELATIVE;
#endif
}
return irelative;
}
GElf_Addr
arch_plt_sym_val(struct ltelf *lte, size_t ndx, GElf_Rela *rela)
{
if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) {
assert(lte->arch.plt_stub_vma != 0);
return lte->arch.plt_stub_vma + PPC_PLT_STUB_SIZE * ndx;
} else if (lte->ehdr.e_machine == EM_PPC) {
return rela->r_offset;
/* Beyond this point, we are on PPC64, but don't have stub
* symbols. */
} else if (reloc_is_irelative(lte->ehdr.e_machine, rela)) {
/* Put JMP_IREL breakpoint to resolver, since there's
* no dedicated PLT entry. */
assert(rela->r_addend != 0);
/* XXX double cast */
arch_addr_t res_addr = (arch_addr_t) (uintptr_t) rela->r_addend;
if (arch_translate_address(lte, res_addr, &res_addr) < 0) {
fprintf(stderr, "Couldn't OPD-translate IRELATIVE "
"resolver address.\n");
return 0;
}
/* XXX double cast */
return (GElf_Addr) (uintptr_t) res_addr;
} else {
/* We put brakpoints to PLT entries the same as the
* PPC32 secure PLT case does. */
assert(lte->arch.plt_stub_vma != 0);
return lte->arch.plt_stub_vma + PPC64_PLT_STUB_SIZE * ndx;
}
}
/* This entry point is called when ltelf is not available
* anymore--during runtime. At that point we don't have to concern
* ourselves with bias, as the values in OPD have been resolved
* already. */
int
arch_translate_address_dyn(struct process *proc,
arch_addr_t addr, arch_addr_t *ret)
{
if (proc->e_machine == EM_PPC64) {
uint64_t value;
if (proc_read_64(proc, addr, &value) < 0) {
fprintf(stderr,
"dynamic .opd translation of %p: %s\n",
addr, strerror(errno));
return -1;
}
/* XXX The double cast should be removed when
* arch_addr_t becomes integral type. */
*ret = (arch_addr_t)(uintptr_t)value;
return 0;
}
*ret = addr;
return 0;
}
int
arch_translate_address(struct ltelf *lte,
arch_addr_t addr, arch_addr_t *ret)
{
if (lte->ehdr.e_machine == EM_PPC64) {
/* XXX The double cast should be removed when
* arch_addr_t becomes integral type. */
GElf_Xword offset
= (GElf_Addr)(uintptr_t)addr - lte->arch.opd_base;
uint64_t value;
if (elf_read_u64(lte->arch.opd_data, offset, &value) < 0) {
fprintf(stderr, "static .opd translation of %p: %s\n",
addr, elf_errmsg(-1));
return -1;
}
*ret = (arch_addr_t)(uintptr_t)(value + lte->bias);
return 0;
}
*ret = addr;
return 0;
}
static int
load_opd_data(struct ltelf *lte, struct library *lib)
{
Elf_Scn *sec;
GElf_Shdr shdr;
if (elf_get_section_named(lte, ".opd", &sec, &shdr) < 0
|| sec == NULL) {
fail:
fprintf(stderr, "couldn't find .opd data\n");
return -1;
}
lte->arch.opd_data = elf_rawdata(sec, NULL);
if (lte->arch.opd_data == NULL)
goto fail;
lte->arch.opd_base = shdr.sh_addr + lte->bias;
lte->arch.opd_size = shdr.sh_size;
return 0;
}
void *
sym2addr(struct process *proc, struct library_symbol *sym)
{
return sym->enter_addr;
}
static GElf_Addr
get_glink_vma(struct ltelf *lte, GElf_Addr ppcgot, Elf_Data *plt_data)
{
Elf_Scn *ppcgot_sec = NULL;
GElf_Shdr ppcgot_shdr;
if (ppcgot != 0
&& (elf_get_section_covering(lte, ppcgot,
&ppcgot_sec, &ppcgot_shdr) < 0
|| ppcgot_sec == NULL))
fprintf(stderr,
"DT_PPC_GOT=%#"PRIx64", but no such section found\n",
ppcgot);
if (ppcgot_sec != NULL) {
Elf_Data *data = elf_loaddata(ppcgot_sec, &ppcgot_shdr);
if (data == NULL || data->d_size < 8 ) {
fprintf(stderr, "couldn't read GOT data\n");
} else {
// where PPCGOT begins in .got
size_t offset = ppcgot - ppcgot_shdr.sh_addr;
assert(offset % 4 == 0);
uint32_t glink_vma;
if (elf_read_u32(data, offset + 4, &glink_vma) < 0) {
fprintf(stderr, "couldn't read glink VMA"
" address at %zd@GOT\n", offset);
return 0;
}
if (glink_vma != 0) {
debug(1, "PPC GOT glink_vma address: %#" PRIx32,
glink_vma);
return (GElf_Addr)glink_vma;
}
}
}
if (plt_data != NULL) {
uint32_t glink_vma;
if (elf_read_u32(plt_data, 0, &glink_vma) < 0) {
fprintf(stderr, "couldn't read glink VMA address\n");
return 0;
}
debug(1, ".plt glink_vma address: %#" PRIx32, glink_vma);
return (GElf_Addr)glink_vma;
}
return 0;
}
static int
nonzero_data(Elf_Data *data)
{
/* We are not supposed to get here if there's no PLT. */
assert(data != NULL);
unsigned char *buf = data->d_buf;
if (buf == NULL)
return 0;
size_t i;
for (i = 0; i < data->d_size; ++i)
if (buf[i] != 0)
return 1;
return 0;
}
static enum callback_status
reloc_copy_if_irelative(GElf_Rela *rela, void *data)
{
struct ltelf *lte = data;
return CBS_STOP_IF(reloc_is_irelative(lte->ehdr.e_machine, rela)
&& VECT_PUSHBACK(<e->plt_relocs, rela) < 0);
}
int
arch_elf_init(struct ltelf *lte, struct library *lib)
{
if (lte->ehdr.e_machine == EM_PPC64
&& load_opd_data(lte, lib) < 0)
return -1;
lte->arch.secure_plt = !(lte->plt_flags & SHF_EXECINSTR);
/* For PPC32 BSS, it is important whether the binary was
* prelinked. If .plt section is NODATA, or if it contains
* zeroes, then this library is not prelinked, and we need to
* delay breakpoints. */
if (lte->ehdr.e_machine == EM_PPC && !lte->arch.secure_plt)
lib->arch.bss_plt_prelinked = nonzero_data(lte->plt_data);
else
/* For cases where it's irrelevant, initialize the
* value to something conspicuous. */
lib->arch.bss_plt_prelinked = -1;
/* On PPC64 and PPC32 secure, IRELATIVE relocations actually
* relocate .iplt section, and as such are stored in .rela.dyn
* (where all non-PLT relocations are stored) instead of
* .rela.plt. Add these to lte->plt_relocs. */
GElf_Addr rela, relasz;
Elf_Scn *rela_sec;
GElf_Shdr rela_shdr;
if ((lte->ehdr.e_machine == EM_PPC64 || lte->arch.secure_plt)
&& elf_load_dynamic_entry(lte, DT_RELA, &rela) == 0
&& elf_load_dynamic_entry(lte, DT_RELASZ, &relasz) == 0
&& elf_get_section_covering(lte, rela, &rela_sec, &rela_shdr) == 0
&& rela_sec != NULL) {
struct vect v;
VECT_INIT(&v, GElf_Rela);
int ret = elf_read_relocs(lte, rela_sec, &rela_shdr, &v);
if (ret >= 0
&& VECT_EACH(&v, GElf_Rela, NULL,
reloc_copy_if_irelative, lte) != NULL)
ret = -1;
VECT_DESTROY(&v, GElf_Rela, NULL, NULL);
if (ret < 0)
return ret;
}
if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) {
GElf_Addr ppcgot;
if (elf_load_dynamic_entry(lte, DT_PPC_GOT, &ppcgot) < 0) {
fprintf(stderr, "couldn't find DT_PPC_GOT\n");
return -1;
}
GElf_Addr glink_vma = get_glink_vma(lte, ppcgot, lte->plt_data);
size_t count = vect_size(<e->plt_relocs);
lte->arch.plt_stub_vma = glink_vma
- (GElf_Addr) count * PPC_PLT_STUB_SIZE;
debug(1, "stub_vma is %#" PRIx64, lte->arch.plt_stub_vma);
} else if (lte->ehdr.e_machine == EM_PPC64) {
GElf_Addr glink_vma;
if (elf_load_dynamic_entry(lte, DT_PPC64_GLINK,
&glink_vma) < 0) {
fprintf(stderr, "couldn't find DT_PPC64_GLINK\n");
return -1;
}
/* The first glink stub starts at offset 32. */
lte->arch.plt_stub_vma = glink_vma + 32;
} else {
/* By exhaustion--PPC32 BSS. */
if (elf_load_dynamic_entry(lte, DT_PLTGOT,
&lib->arch.pltgot_addr) < 0) {
fprintf(stderr, "couldn't find DT_PLTGOT\n");
return -1;
}
}
/* On PPC64, look for stub symbols in symbol table. These are
* called: xxxxxxxx.plt_call.callee_name@version+addend. */
if (lte->ehdr.e_machine == EM_PPC64
&& lte->symtab != NULL && lte->strtab != NULL) {
/* N.B. We can't simply skip the symbols that we fail
* to read or malloc. There may be more than one stub
* per symbol name, and if we failed in one but
* succeeded in another, the PLT enabling code would
* have no way to tell that something is missing. We
* could work around that, of course, but it doesn't
* seem worth the trouble. So if anything fails, we
* just pretend that we don't have stub symbols at
* all, as if the binary is stripped. */
size_t i;
for (i = 0; i < lte->symtab_count; ++i) {
GElf_Sym sym;
if (gelf_getsym(lte->symtab, i, &sym) == NULL) {
struct library_symbol *sym, *next;
fail:
for (sym = lte->arch.stubs; sym != NULL; ) {
next = sym->next;
library_symbol_destroy(sym);
free(sym);
sym = next;
}
lte->arch.stubs = NULL;
break;
}
const char *name = lte->strtab + sym.st_name;
#define STUBN ".plt_call."
if ((name = strstr(name, STUBN)) == NULL)
continue;
name += sizeof(STUBN) - 1;
#undef STUBN
size_t len;
const char *ver = strchr(name, '@');
if (ver != NULL) {
len = ver - name;
} else {
/* If there is "+" at all, check that
* the symbol name ends in "+0". */
const char *add = strrchr(name, '+');
if (add != NULL) {
assert(strcmp(add, "+0") == 0);
len = add - name;
} else {
len = strlen(name);
}
}
char *sym_name = strndup(name, len);
struct library_symbol *libsym = malloc(sizeof(*libsym));
if (sym_name == NULL || libsym == NULL) {
fail2:
free(sym_name);
free(libsym);
goto fail;
}
/* XXX The double cast should be removed when
* arch_addr_t becomes integral type. */
arch_addr_t addr = (arch_addr_t)
(uintptr_t)sym.st_value + lte->bias;
if (library_symbol_init(libsym, addr, sym_name, 1,
LS_TOPLT_EXEC) < 0)
goto fail2;
libsym->arch.type = PPC64_PLT_STUB;
libsym->next = lte->arch.stubs;
lte->arch.stubs = libsym;
}
}
return 0;
}
static int
read_plt_slot_value(struct process *proc, GElf_Addr addr, GElf_Addr *valp)
{
/* On PPC64, we read from .plt, which contains 8 byte
* addresses. On PPC32 we read from .plt, which contains 4
* byte instructions, but the PLT is two instructions, and
* either can change. */
uint64_t l;
/* XXX double cast. */
if (proc_read_64(proc, (arch_addr_t)(uintptr_t)addr, &l) < 0) {
fprintf(stderr, "ptrace .plt slot value @%#" PRIx64": %s\n",
addr, strerror(errno));
return -1;
}
*valp = (GElf_Addr)l;
return 0;
}
static int
unresolve_plt_slot(struct process *proc, GElf_Addr addr, GElf_Addr value)
{
/* We only modify plt_entry[0], which holds the resolved
* address of the routine. We keep the TOC and environment
* pointers intact. Hence the only adjustment that we need to
* do is to IP. */
if (ptrace(PTRACE_POKETEXT, proc->pid, addr, value) < 0) {
fprintf(stderr, "failed to unresolve .plt slot: %s\n",
strerror(errno));
return -1;
}
return 0;
}
enum plt_status
arch_elf_add_func_entry(struct process *proc, struct ltelf *lte,
const GElf_Sym *sym,
arch_addr_t addr, const char *name,
struct library_symbol **ret)
{
if (lte->ehdr.e_machine != EM_PPC || lte->ehdr.e_type == ET_DYN)
return PLT_DEFAULT;
bool ifunc = false;
#ifdef STT_GNU_IFUNC
ifunc = GELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC;
#endif
if (! ifunc)
return PLT_DEFAULT;
size_t len = vect_size(<e->plt_relocs);
size_t i;
for (i = 0; i < len; ++i) {
GElf_Rela *rela = VECT_ELEMENT(<e->plt_relocs, GElf_Rela, i);
if (sym->st_value == arch_plt_sym_val(lte, i, rela)) {
char *tmp_name = linux_append_IFUNC_to_name(name);
struct library_symbol *libsym = malloc(sizeof *libsym);
/* XXX double cast. */
arch_addr_t resolver_addr
= (arch_addr_t) (uintptr_t) rela->r_addend;
if (tmp_name == NULL || libsym == NULL
|| library_symbol_init(libsym, resolver_addr,
tmp_name, 1,
LS_TOPLT_EXEC) < 0) {
fail:
free(tmp_name);
free(libsym);
return PLT_FAIL;
}
if (elf_add_plt_entry(proc, lte, name, rela,
i, ret) < 0) {
library_symbol_destroy(libsym);
goto fail;
}
libsym->proto = linux_IFUNC_prototype();
libsym->next = *ret;
*ret = libsym;
return PLT_OK;
}
}
*ret = NULL;
return PLT_OK;
}
struct ppc_unresolve_data {
struct ppc_unresolve_data *self; /* A canary. */
GElf_Addr plt_entry_addr;
GElf_Addr plt_slot_addr;
GElf_Addr plt_slot_value;
bool is_irelative;
};
enum plt_status
arch_elf_add_plt_entry(struct process *proc, struct ltelf *lte,
const char *a_name, GElf_Rela *rela, size_t ndx,
struct library_symbol **ret)
{
bool is_irelative = reloc_is_irelative(lte->ehdr.e_machine, rela);
char *name;
if (! is_irelative) {
name = strdup(a_name);
} else {
GElf_Addr addr = lte->ehdr.e_machine == EM_PPC64
? (GElf_Addr) rela->r_addend
: arch_plt_sym_val(lte, ndx, rela);
name = linux_elf_find_irelative_name(lte, addr);
}
if (name == NULL) {
fail:
free(name);
return PLT_FAIL;
}
struct library_symbol *chain = NULL;
if (lte->ehdr.e_machine == EM_PPC) {
if (default_elf_add_plt_entry(proc, lte, name, rela, ndx,
&chain) < 0)
goto fail;
if (! lte->arch.secure_plt) {
/* On PPC32 with BSS PLT, delay the symbol
* until dynamic linker is done. */
assert(!chain->delayed);
chain->delayed = 1;
}
ok:
*ret = chain;
free(name);
return PLT_OK;
}
/* PPC64. If we have stubs, we return a chain of breakpoint
* sites, one for each stub that corresponds to this PLT
* entry. */
struct library_symbol **symp;
for (symp = <e->arch.stubs; *symp != NULL; ) {
struct library_symbol *sym = *symp;
if (strcmp(sym->name, name) != 0) {
symp = &(*symp)->next;
continue;
}
/* Re-chain the symbol from stubs to CHAIN. */
*symp = sym->next;
sym->next = chain;
chain = sym;
}
if (chain != NULL)
goto ok;
/* We don't have stub symbols. Find corresponding .plt slot,
* and check whether it contains the corresponding PLT address
* (or 0 if the dynamic linker hasn't run yet). N.B. we don't
* want read this from ELF file, but from process image. That
* makes a difference if we are attaching to a running
* process. */
GElf_Addr plt_entry_addr = arch_plt_sym_val(lte, ndx, rela);
GElf_Addr plt_slot_addr = rela->r_offset;
assert(plt_slot_addr >= lte->plt_addr
|| plt_slot_addr < lte->plt_addr + lte->plt_size);
GElf_Addr plt_slot_value;
if (read_plt_slot_value(proc, plt_slot_addr, &plt_slot_value) < 0)
goto fail;
struct library_symbol *libsym = malloc(sizeof(*libsym));
if (libsym == NULL) {
fprintf(stderr, "allocation for .plt slot: %s\n",
strerror(errno));
fail2:
free(libsym);
goto fail;
}
/* XXX The double cast should be removed when
* arch_addr_t becomes integral type. */
if (library_symbol_init(libsym,
(arch_addr_t) (uintptr_t) plt_entry_addr,
name, 1, LS_TOPLT_EXEC) < 0)
goto fail2;
libsym->arch.plt_slot_addr = plt_slot_addr;
if (! is_irelative
&& (plt_slot_value == plt_entry_addr || plt_slot_value == 0)) {
libsym->arch.type = PPC_PLT_UNRESOLVED;
libsym->arch.resolved_value = plt_entry_addr;
} else {
/* Mark the symbol for later unresolving. We may not
* do this right away, as this is called by ltrace
* core for all symbols, and only later filtered. We
* only unresolve the symbol before the breakpoint is
* enabled. */
libsym->arch.type = PPC_PLT_NEED_UNRESOLVE;
libsym->arch.data = malloc(sizeof *libsym->arch.data);
if (libsym->arch.data == NULL)
goto fail2;
libsym->arch.data->self = libsym->arch.data;
libsym->arch.data->plt_entry_addr = plt_entry_addr;
libsym->arch.data->plt_slot_addr = plt_slot_addr;
libsym->arch.data->plt_slot_value = plt_slot_value;
libsym->arch.data->is_irelative = is_irelative;
}
*ret = libsym;
return PLT_OK;
}
void
arch_elf_destroy(struct ltelf *lte)
{
struct library_symbol *sym;
for (sym = lte->arch.stubs; sym != NULL; ) {
struct library_symbol *next = sym->next;
library_symbol_destroy(sym);
free(sym);
sym = next;
}
}
static void
dl_plt_update_bp_on_hit(struct breakpoint *bp, struct process *proc)
{
debug(DEBUG_PROCESS, "pid=%d dl_plt_update_bp_on_hit %s(%p)",
proc->pid, breakpoint_name(bp), bp->addr);
struct process_stopping_handler *self = proc->arch.handler;
assert(self != NULL);
struct library_symbol *libsym = self->breakpoint_being_enabled->libsym;
GElf_Addr value;
if (read_plt_slot_value(proc, libsym->arch.plt_slot_addr, &value) < 0)
return;
/* On PPC64, we rewrite the slot value. */
if (proc->e_machine == EM_PPC64)
unresolve_plt_slot(proc, libsym->arch.plt_slot_addr,
libsym->arch.resolved_value);
/* We mark the breakpoint as resolved on both arches. */
mark_as_resolved(libsym, value);
/* cb_on_all_stopped looks if HANDLER is set to NULL as a way
* to check that this was run. It's an error if it
* wasn't. */
proc->arch.handler = NULL;
breakpoint_turn_off(bp, proc);
}
static void
cb_on_all_stopped(struct process_stopping_handler *self)
{
/* Put that in for dl_plt_update_bp_on_hit to see. */
assert(self->task_enabling_breakpoint->arch.handler == NULL);
self->task_enabling_breakpoint->arch.handler = self;
linux_ptrace_disable_and_continue(self);
}
static enum callback_status
cb_keep_stepping_p(struct process_stopping_handler *self)
{
struct process *proc = self->task_enabling_breakpoint;
struct library_symbol *libsym = self->breakpoint_being_enabled->libsym;
GElf_Addr value;
if (read_plt_slot_value(proc, libsym->arch.plt_slot_addr, &value) < 0)
return CBS_FAIL;
/* In UNRESOLVED state, the RESOLVED_VALUE in fact contains
* the PLT entry value. */
if (value == libsym->arch.resolved_value)
return CBS_CONT;
debug(DEBUG_PROCESS, "pid=%d PLT got resolved to value %#"PRIx64,
proc->pid, value);
/* The .plt slot got resolved! We can migrate the breakpoint
* to RESOLVED and stop single-stepping. */
if (proc->e_machine == EM_PPC64
&& unresolve_plt_slot(proc, libsym->arch.plt_slot_addr,
libsym->arch.resolved_value) < 0)
return CBS_FAIL;
/* Resolving on PPC64 consists of overwriting a doubleword in
* .plt. That doubleword is than read back by a stub, and
* jumped on. Hopefully we can assume that double word update
* is done on a single place only, as it contains a final
* address. We still need to look around for any sync
* instruction, but essentially it is safe to optimize away
* the single stepping next time and install a post-update
* breakpoint.
*
* The situation on PPC32 BSS is more complicated. The
* dynamic linker here updates potentially several
* instructions (XXX currently we assume two) and the rules
* are more complicated. Sometimes it's enough to adjust just
* one of the addresses--the logic for generating optimal
* dispatch depends on relative addresses of the .plt entry
* and the jump destination. We can't assume that the some
* instruction block does the update every time. So on PPC32,
* we turn the optimization off and just step through it each
* time. */
if (proc->e_machine == EM_PPC)
goto done;
/* Install breakpoint to the address where the change takes
* place. If we fail, then that just means that we'll have to
* singlestep the next time around as well. */
struct process *leader = proc->leader;
if (leader == NULL || leader->arch.dl_plt_update_bp != NULL)
goto done;
/* We need to install to the next instruction. ADDR points to
* a store instruction, so moving the breakpoint one
* instruction forward is safe. */
arch_addr_t addr = get_instruction_pointer(proc) + 4;
leader->arch.dl_plt_update_bp = insert_breakpoint_at(proc, addr, NULL);
if (leader->arch.dl_plt_update_bp == NULL)
goto done;
static struct bp_callbacks dl_plt_update_cbs = {
.on_hit = dl_plt_update_bp_on_hit,
};
leader->arch.dl_plt_update_bp->cbs = &dl_plt_update_cbs;
/* Turn it off for now. We will turn it on again when we hit
* the PLT entry that needs this. */
breakpoint_turn_off(leader->arch.dl_plt_update_bp, proc);
done:
mark_as_resolved(libsym, value);
return CBS_STOP;
}
static void
jump_to_entry_point(struct process *proc, struct breakpoint *bp)
{
/* XXX The double cast should be removed when
* arch_addr_t becomes integral type. */
arch_addr_t rv = (arch_addr_t)
(uintptr_t)bp->libsym->arch.resolved_value;
set_instruction_pointer(proc, rv);
}
static void
ppc_plt_bp_continue(struct breakpoint *bp, struct process *proc)
{
/* If this is a first call through IREL breakpoint, enable the
* symbol so that it doesn't look like an artificial
* breakpoint anymore. */
if (bp->libsym == NULL) {
assert(bp->arch.irel_libsym != NULL);
bp->libsym = bp->arch.irel_libsym;
bp->arch.irel_libsym = NULL;
}
switch (bp->libsym->arch.type) {
struct process *leader;
void (*on_all_stopped)(struct process_stopping_handler *);
enum callback_status (*keep_stepping_p)
(struct process_stopping_handler *);
case PPC_DEFAULT:
assert(proc->e_machine == EM_PPC);
assert(bp->libsym != NULL);
assert(bp->libsym->lib->arch.bss_plt_prelinked == 0);
/* Fall through. */
case PPC_PLT_IRELATIVE:
case PPC_PLT_UNRESOLVED:
on_all_stopped = NULL;
keep_stepping_p = NULL;
leader = proc->leader;
if (leader != NULL && leader->arch.dl_plt_update_bp != NULL
&& breakpoint_turn_on(leader->arch.dl_plt_update_bp,
proc) >= 0)
on_all_stopped = cb_on_all_stopped;
else
keep_stepping_p = cb_keep_stepping_p;
if (process_install_stopping_handler
(proc, bp, on_all_stopped, keep_stepping_p, NULL) < 0) {
fprintf(stderr, "ppc_plt_bp_continue: "
"couldn't install event handler\n");
continue_after_breakpoint(proc, bp);
}
return;
case PPC_PLT_RESOLVED:
if (proc->e_machine == EM_PPC) {
continue_after_breakpoint(proc, bp);
return;
}
jump_to_entry_point(proc, bp);
continue_process(proc->pid);
return;
case PPC64_PLT_STUB:
case PPC_PLT_NEED_UNRESOLVE:
/* These should never hit here. */
break;
}
assert(bp->libsym->arch.type != bp->libsym->arch.type);
abort();
}
/* When a process is in a PLT stub, it may have already read the data
* in .plt that we changed. If we detach now, it will jump to PLT
* entry and continue to the dynamic linker, where it will SIGSEGV,
* because zeroth .plt slot is not filled in prelinked binaries, and
* the dynamic linker needs that data. Moreover, the process may
* actually have hit the breakpoint already. This functions tries to
* detect both cases and do any fix-ups necessary to mend this
* situation. */
static enum callback_status
detach_task_cb(struct process *task, void *data)
{
struct breakpoint *bp = data;
if (get_instruction_pointer(task) == bp->addr) {
debug(DEBUG_PROCESS, "%d at %p, which is PLT slot",
task->pid, bp->addr);
jump_to_entry_point(task, bp);
return CBS_CONT;
}
/* XXX There's still a window of several instructions where we
* might catch the task inside a stub such that it has already
* read destination address from .plt, but hasn't jumped yet,
* thus avoiding the breakpoint. */
return CBS_CONT;
}
static void
ppc_plt_bp_retract(struct breakpoint *bp, struct process *proc)
{
/* On PPC64, we rewrite .plt with PLT entry addresses. This
* needs to be undone. Unfortunately, the program may have
* made decisions based on that value */
if (proc->e_machine == EM_PPC64
&& bp->libsym != NULL
&& bp->libsym->arch.type == PPC_PLT_RESOLVED) {
each_task(proc->leader, NULL, detach_task_cb, bp);
unresolve_plt_slot(proc, bp->libsym->arch.plt_slot_addr,
bp->libsym->arch.resolved_value);
}
}
static void
ppc_plt_bp_install(struct breakpoint *bp, struct process *proc)
{
/* This should not be an artificial breakpoint. */
struct library_symbol *libsym = bp->libsym;
if (libsym == NULL)
libsym = bp->arch.irel_libsym;
assert(libsym != NULL);
if (libsym->arch.type == PPC_PLT_NEED_UNRESOLVE) {
/* Unresolve the .plt slot. If the binary was
* prelinked, this makes the code invalid, because in
* case of prelinked binary, the dynamic linker
* doesn't update .plt[0] and .plt[1] with addresses
* of the resover. But we don't care, we will never
* need to enter the resolver. That just means that
* we have to un-un-resolve this back before we
* detach. */
struct ppc_unresolve_data *data = libsym->arch.data;
libsym->arch.data = NULL;
assert(data->self == data);
GElf_Addr plt_slot_addr = data->plt_slot_addr;
GElf_Addr plt_slot_value = data->plt_slot_value;
GElf_Addr plt_entry_addr = data->plt_entry_addr;
if (unresolve_plt_slot(proc, plt_slot_addr,
plt_entry_addr) == 0) {
if (! data->is_irelative) {
mark_as_resolved(libsym, plt_slot_value);
} else {
libsym->arch.type = PPC_PLT_IRELATIVE;
libsym->arch.resolved_value = plt_entry_addr;
}
} else {
fprintf(stderr, "Couldn't unresolve %s@%p. Not tracing"
" this symbol.\n",
breakpoint_name(bp), bp->addr);
proc_remove_breakpoint(proc, bp);
}
free(data);
}
}
int
arch_library_init(struct library *lib)
{
return 0;
}
void
arch_library_destroy(struct library *lib)
{
}
int
arch_library_clone(struct library *retp, struct library *lib)
{
return 0;
}
int
arch_library_symbol_init(struct library_symbol *libsym)
{
/* We set type explicitly in the code above, where we have the
* necessary context. This is for calls from ltrace-elf.c and
* such. */
libsym->arch.type = PPC_DEFAULT;
return 0;
}
void
arch_library_symbol_destroy(struct library_symbol *libsym)
{
if (libsym->arch.type == PPC_PLT_NEED_UNRESOLVE) {
assert(libsym->arch.data->self == libsym->arch.data);
free(libsym->arch.data);
libsym->arch.data = NULL;
}
}
int
arch_library_symbol_clone(struct library_symbol *retp,
struct library_symbol *libsym)
{
retp->arch = libsym->arch;
return 0;
}
/* For some symbol types, we need to set up custom callbacks. XXX we
* don't need PROC here, we can store the data in BP if it is of
* interest to us. */
int
arch_breakpoint_init(struct process *proc, struct breakpoint *bp)
{
bp->arch.irel_libsym = NULL;
/* Artificial and entry-point breakpoints are plain. */
if (bp->libsym == NULL || bp->libsym->plt_type != LS_TOPLT_EXEC)
return 0;
/* On PPC, secure PLT and prelinked BSS PLT are plain. */
if (proc->e_machine == EM_PPC
&& bp->libsym->lib->arch.bss_plt_prelinked != 0)
return 0;
/* On PPC64, stub PLT breakpoints are plain. */
if (proc->e_machine == EM_PPC64
&& bp->libsym->arch.type == PPC64_PLT_STUB)
return 0;
static struct bp_callbacks cbs = {
.on_continue = ppc_plt_bp_continue,
.on_retract = ppc_plt_bp_retract,
.on_install = ppc_plt_bp_install,
};
breakpoint_set_callbacks(bp, &cbs);
/* For JMP_IREL breakpoints, make the breakpoint look
* artificial by hiding the symbol. */
if (bp->libsym->arch.type == PPC_PLT_IRELATIVE) {
bp->arch.irel_libsym = bp->libsym;
bp->libsym = NULL;
}
return 0;
}
void
arch_breakpoint_destroy(struct breakpoint *bp)
{
}
int
arch_breakpoint_clone(struct breakpoint *retp, struct breakpoint *sbp)
{
retp->arch = sbp->arch;
return 0;
}
int
arch_process_init(struct process *proc)
{
proc->arch.dl_plt_update_bp = NULL;
proc->arch.handler = NULL;
return 0;
}
void
arch_process_destroy(struct process *proc)
{
}
int
arch_process_clone(struct process *retp, struct process *proc)
{
retp->arch = proc->arch;
if (retp->arch.dl_plt_update_bp != NULL) {
/* Point it to the corresponding breakpoint in RETP.
* It must be there, this part of PROC has already
* been cloned to RETP. */
retp->arch.dl_plt_update_bp
= address2bpstruct(retp,
retp->arch.dl_plt_update_bp->addr);
assert(retp->arch.dl_plt_update_bp != NULL);
}
return 0;
}
int
arch_process_exec(struct process *proc)
{
return arch_process_init(proc);
}