/* * Copyright (C) 2018 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "src/profiling/memory/unwinding.h" #include <sys/types.h> #include <unistd.h> #include <unwindstack/MachineArm.h> #include <unwindstack/MachineArm64.h> #include <unwindstack/MachineMips.h> #include <unwindstack/MachineMips64.h> #include <unwindstack/MachineX86.h> #include <unwindstack/MachineX86_64.h> #include <unwindstack/Maps.h> #include <unwindstack/Memory.h> #include <unwindstack/Regs.h> #include <unwindstack/RegsArm.h> #include <unwindstack/RegsArm64.h> #include <unwindstack/RegsMips.h> #include <unwindstack/RegsMips64.h> #include <unwindstack/RegsX86.h> #include <unwindstack/RegsX86_64.h> #include <unwindstack/Unwinder.h> #include <unwindstack/UserArm.h> #include <unwindstack/UserArm64.h> #include <unwindstack/UserMips.h> #include <unwindstack/UserMips64.h> #include <unwindstack/UserX86.h> #include <unwindstack/UserX86_64.h> #include <procinfo/process_map.h> #include "perfetto/base/file_utils.h" #include "perfetto/base/logging.h" #include "perfetto/base/scoped_file.h" #include "perfetto/base/string_utils.h" #include "perfetto/base/task_runner.h" #include "perfetto/base/thread_task_runner.h" #include "src/profiling/memory/wire_protocol.h" namespace perfetto { namespace profiling { namespace { constexpr size_t kMaxFrames = 1000; // We assume average ~300us per unwind. If we handle up to 1000 unwinds, this // makes sure other tasks get to be run at least every 300ms if the unwinding // saturates this thread. constexpr size_t kUnwindBatchSize = 1000; #pragma GCC diagnostic push // We do not care about deterministic destructor order. #pragma GCC diagnostic ignored "-Wglobal-constructors" #pragma GCC diagnostic ignored "-Wexit-time-destructors" static std::vector<std::string> kSkipMaps{"heapprofd_client.so"}; #pragma GCC diagnostic pop std::unique_ptr<unwindstack::Regs> CreateFromRawData(unwindstack::ArchEnum arch, void* raw_data) { std::unique_ptr<unwindstack::Regs> ret; // unwindstack::RegsX::Read returns a raw ptr which we are expected to free. switch (arch) { case unwindstack::ARCH_X86: ret.reset(unwindstack::RegsX86::Read(raw_data)); break; case unwindstack::ARCH_X86_64: ret.reset(unwindstack::RegsX86_64::Read(raw_data)); break; case unwindstack::ARCH_ARM: ret.reset(unwindstack::RegsArm::Read(raw_data)); break; case unwindstack::ARCH_ARM64: ret.reset(unwindstack::RegsArm64::Read(raw_data)); break; case unwindstack::ARCH_MIPS: ret.reset(unwindstack::RegsMips::Read(raw_data)); break; case unwindstack::ARCH_MIPS64: ret.reset(unwindstack::RegsMips64::Read(raw_data)); break; case unwindstack::ARCH_UNKNOWN: ret.reset(nullptr); break; } return ret; } // Behaves as a pread64, emulating it if not already exposed by the standard // library. Safe to use on 32bit platforms for addresses with the top bit set. // Clobbers the |fd| seek position if emulating. ssize_t ReadAtOffsetClobberSeekPos(int fd, void* buf, size_t count, uint64_t addr) { #ifdef __BIONIC__ return pread64(fd, buf, count, static_cast<off64_t>(addr)); #else if (lseek64(fd, static_cast<off64_t>(addr), SEEK_SET) == -1) return -1; return read(fd, buf, count); #endif } } // namespace StackOverlayMemory::StackOverlayMemory(std::shared_ptr<unwindstack::Memory> mem, uint64_t sp, uint8_t* stack, size_t size) : mem_(std::move(mem)), sp_(sp), stack_end_(sp + size), stack_(stack) {} size_t StackOverlayMemory::Read(uint64_t addr, void* dst, size_t size) { if (addr >= sp_ && addr + size <= stack_end_ && addr + size > sp_) { size_t offset = static_cast<size_t>(addr - sp_); memcpy(dst, stack_ + offset, size); return size; } return mem_->Read(addr, dst, size); } FDMemory::FDMemory(base::ScopedFile mem_fd) : mem_fd_(std::move(mem_fd)) {} size_t FDMemory::Read(uint64_t addr, void* dst, size_t size) { ssize_t rd = ReadAtOffsetClobberSeekPos(*mem_fd_, dst, size, addr); if (rd == -1) { PERFETTO_DPLOG("read of %zu at offset %" PRIu64, size, addr); return 0; } return static_cast<size_t>(rd); } FileDescriptorMaps::FileDescriptorMaps(base::ScopedFile fd) : fd_(std::move(fd)) {} bool FileDescriptorMaps::Parse() { // If the process has already exited, lseek or ReadFileDescriptor will // return false. if (lseek(*fd_, 0, SEEK_SET) == -1) return false; std::string content; if (!base::ReadFileDescriptor(*fd_, &content)) return false; return android::procinfo::ReadMapFileContent( &content[0], [&](uint64_t start, uint64_t end, uint16_t flags, uint64_t pgoff, ino_t, const char* name) { // Mark a device map in /dev/ and not in /dev/ashmem/ specially. if (strncmp(name, "/dev/", 5) == 0 && strncmp(name + 5, "ashmem/", 7) != 0) { flags |= unwindstack::MAPS_FLAGS_DEVICE_MAP; } unwindstack::MapInfo* prev_map = maps_.empty() ? nullptr : maps_.back().get(); maps_.emplace_back( new unwindstack::MapInfo(prev_map, start, end, pgoff, flags, name)); }); } void FileDescriptorMaps::Reset() { maps_.clear(); } bool DoUnwind(WireMessage* msg, UnwindingMetadata* metadata, AllocRecord* out) { AllocMetadata* alloc_metadata = msg->alloc_header; std::unique_ptr<unwindstack::Regs> regs( CreateFromRawData(alloc_metadata->arch, alloc_metadata->register_data)); if (regs == nullptr) { PERFETTO_DLOG("Unable to construct unwindstack::Regs"); unwindstack::FrameData frame_data{}; frame_data.function_name = "ERROR READING REGISTERS"; frame_data.map_name = "ERROR"; out->frames.emplace_back(frame_data, ""); out->error = true; return false; } uint8_t* stack = reinterpret_cast<uint8_t*>(msg->payload); std::shared_ptr<unwindstack::Memory> mems = std::make_shared<StackOverlayMemory>(metadata->fd_mem, alloc_metadata->stack_pointer, stack, msg->payload_size); unwindstack::Unwinder unwinder(kMaxFrames, &metadata->maps, regs.get(), mems); #if PERFETTO_BUILDFLAG(PERFETTO_ANDROID_BUILD) unwinder.SetJitDebug(metadata->jit_debug.get(), regs->Arch()); unwinder.SetDexFiles(metadata->dex_files.get(), regs->Arch()); #endif // Surpress incorrect "variable may be uninitialized" error for if condition // after this loop. error_code = LastErrorCode gets run at least once. uint8_t error_code = 0; for (int attempt = 0; attempt < 2; ++attempt) { if (attempt > 0) { PERFETTO_DLOG("Reparsing maps"); metadata->ReparseMaps(); out->reparsed_map = true; #if PERFETTO_BUILDFLAG(PERFETTO_ANDROID_BUILD) unwinder.SetJitDebug(metadata->jit_debug.get(), regs->Arch()); unwinder.SetDexFiles(metadata->dex_files.get(), regs->Arch()); #endif } unwinder.Unwind(&kSkipMaps, nullptr); error_code = unwinder.LastErrorCode(); if (error_code != unwindstack::ERROR_INVALID_MAP) break; } std::vector<unwindstack::FrameData> frames = unwinder.ConsumeFrames(); for (unwindstack::FrameData& fd : frames) { std::string build_id; if (fd.map_name != "") { unwindstack::MapInfo* map_info = metadata->maps.Find(fd.pc); if (map_info) build_id = map_info->GetBuildID(); } out->frames.emplace_back(std::move(fd), std::move(build_id)); } if (error_code != 0) { PERFETTO_DLOG("Unwinding error %" PRIu8, error_code); unwindstack::FrameData frame_data{}; frame_data.function_name = "ERROR " + std::to_string(error_code); frame_data.map_name = "ERROR"; out->frames.emplace_back(frame_data, ""); out->error = true; } return true; } void UnwindingWorker::OnDisconnect(base::UnixSocket* self) { // TODO(fmayer): Maybe try to drain shmem one last time. auto it = client_data_.find(self->peer_pid()); if (it == client_data_.end()) { PERFETTO_DFATAL("Disconnected unexpecter socket."); return; } ClientData& client_data = it->second; SharedRingBuffer& shmem = client_data.shmem; // Currently, these stats are used to determine whether the application // disconnected due to an error condition (i.e. buffer overflow) or // volutarily. Because a buffer overflow leads to an immediate disconnect, we // do not need these stats when heapprofd tears down the tracing session. // // TODO(fmayer): We should make it that normal disconnects also go through // this code path, so we can write other stats to the result. This will also // allow us to free the bookkeeping data earlier for processes that exit // during the session. See TODO in // HeapprofdProducer::HandleSocketDisconnected. SharedRingBuffer::Stats stats = {}; { auto lock = shmem.AcquireLock(ScopedSpinlock::Mode::Try); if (lock.locked()) stats = shmem.GetStats(lock); else PERFETTO_ELOG("Failed to log shmem to get stats."); } DataSourceInstanceID ds_id = client_data.data_source_instance_id; pid_t peer_pid = self->peer_pid(); client_data_.erase(it); // The erase invalidates the self pointer. self = nullptr; delegate_->PostSocketDisconnected(ds_id, peer_pid, stats); } void UnwindingWorker::OnDataAvailable(base::UnixSocket* self) { // Drain buffer to clear the notification. char recv_buf[kUnwindBatchSize]; self->Receive(recv_buf, sizeof(recv_buf)); HandleUnwindBatch(self->peer_pid()); } void UnwindingWorker::HandleUnwindBatch(pid_t peer_pid) { auto it = client_data_.find(peer_pid); if (it == client_data_.end()) { // This can happen if the client disconnected before the buffer was fully // handled. PERFETTO_DLOG("Unexpected data."); return; } ClientData& client_data = it->second; SharedRingBuffer& shmem = client_data.shmem; SharedRingBuffer::Buffer buf; size_t i; bool repost_task = false; for (i = 0; i < kUnwindBatchSize; ++i) { uint64_t reparses_before = client_data.metadata.reparses; // TODO(fmayer): Allow spinlock acquisition to fail and repost Task if it // did. buf = shmem.BeginRead(); if (!buf) break; HandleBuffer(buf, &client_data.metadata, client_data.data_source_instance_id, client_data.sock->peer_pid(), delegate_); shmem.EndRead(std::move(buf)); // Reparsing takes time, so process the rest in a new batch to avoid timing // out. // TODO(fmayer): Do not special case blocking mode. if (client_data.client_config.block_client && reparses_before < client_data.metadata.reparses) { repost_task = true; break; } } // Always repost if we have gone through the whole batch. if (i == kUnwindBatchSize) repost_task = true; if (repost_task) { thread_task_runner_.get()->PostTask( [this, peer_pid] { HandleUnwindBatch(peer_pid); }); } } // static void UnwindingWorker::HandleBuffer(const SharedRingBuffer::Buffer& buf, UnwindingMetadata* unwinding_metadata, DataSourceInstanceID data_source_instance_id, pid_t peer_pid, Delegate* delegate) { WireMessage msg; // TODO(fmayer): standardise on char* or uint8_t*. // char* has stronger guarantees regarding aliasing. // see https://timsong-cpp.github.io/cppwp/n3337/basic.lval#10.8 if (!ReceiveWireMessage(reinterpret_cast<char*>(buf.data), buf.size, &msg)) { PERFETTO_DFATAL("Failed to receive wire message."); return; } if (msg.record_type == RecordType::Malloc) { AllocRecord rec; rec.alloc_metadata = *msg.alloc_header; rec.pid = peer_pid; rec.data_source_instance_id = data_source_instance_id; auto start_time_us = base::GetWallTimeNs() / 1000; DoUnwind(&msg, unwinding_metadata, &rec); rec.unwinding_time_us = static_cast<uint64_t>( ((base::GetWallTimeNs() / 1000) - start_time_us).count()); delegate->PostAllocRecord(std::move(rec)); } else if (msg.record_type == RecordType::Free) { FreeRecord rec; rec.pid = peer_pid; rec.data_source_instance_id = data_source_instance_id; // We need to copy this, so we can return the memory to the shmem buffer. memcpy(&rec.free_batch, msg.free_header, sizeof(*msg.free_header)); delegate->PostFreeRecord(std::move(rec)); } else { PERFETTO_DFATAL("Invalid record type."); } } void UnwindingWorker::PostHandoffSocket(HandoffData handoff_data) { // Even with C++14, this cannot be moved, as std::function has to be // copyable, which HandoffData is not. HandoffData* raw_data = new HandoffData(std::move(handoff_data)); // We do not need to use a WeakPtr here because the task runner will not // outlive its UnwindingWorker. thread_task_runner_.get()->PostTask([this, raw_data] { HandoffData data = std::move(*raw_data); delete raw_data; HandleHandoffSocket(std::move(data)); }); } void UnwindingWorker::HandleHandoffSocket(HandoffData handoff_data) { auto sock = base::UnixSocket::AdoptConnected( handoff_data.sock.ReleaseFd(), this, this->thread_task_runner_.get(), base::SockType::kStream); pid_t peer_pid = sock->peer_pid(); UnwindingMetadata metadata(peer_pid, std::move(handoff_data.fds[kHandshakeMaps]), std::move(handoff_data.fds[kHandshakeMem])); ClientData client_data{ handoff_data.data_source_instance_id, std::move(sock), std::move(metadata), std::move(handoff_data.shmem), std::move(handoff_data.client_config), }; client_data_.emplace(peer_pid, std::move(client_data)); } void UnwindingWorker::PostDisconnectSocket(pid_t pid) { // We do not need to use a WeakPtr here because the task runner will not // outlive its UnwindingWorker. thread_task_runner_.get()->PostTask( [this, pid] { HandleDisconnectSocket(pid); }); } void UnwindingWorker::HandleDisconnectSocket(pid_t pid) { client_data_.erase(pid); } UnwindingWorker::Delegate::~Delegate() = default; } // namespace profiling } // namespace perfetto