C++程序  |  1425行  |  50.03 KB

/*
 * Copyright (C) 2015 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <inttypes.h>
#include <libgen.h>
#include <signal.h>
#include <sys/prctl.h>
#include <sys/utsname.h>
#include <time.h>
#include <unistd.h>
#include <set>
#include <string>
#include <unordered_map>
#include <vector>

#include <android-base/logging.h>
#include <android-base/file.h>
#include <android-base/parsedouble.h>
#include <android-base/parseint.h>
#include <android-base/strings.h>
#include <android-base/test_utils.h>
#if defined(__ANDROID__)
#include <android-base/properties.h>
#endif

#include "CallChainJoiner.h"
#include "command.h"
#include "environment.h"
#include "event_selection_set.h"
#include "event_type.h"
#include "IOEventLoop.h"
#include "OfflineUnwinder.h"
#include "perf_clock.h"
#include "read_apk.h"
#include "read_elf.h"
#include "record.h"
#include "record_file.h"
#include "thread_tree.h"
#include "tracing.h"
#include "utils.h"
#include "workload.h"

using namespace simpleperf;

static std::string default_measured_event_type = "cpu-cycles";

static std::unordered_map<std::string, uint64_t> branch_sampling_type_map = {
    {"u", PERF_SAMPLE_BRANCH_USER},
    {"k", PERF_SAMPLE_BRANCH_KERNEL},
    {"any", PERF_SAMPLE_BRANCH_ANY},
    {"any_call", PERF_SAMPLE_BRANCH_ANY_CALL},
    {"any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN},
    {"ind_call", PERF_SAMPLE_BRANCH_IND_CALL},
};

static std::unordered_map<std::string, int> clockid_map = {
    {"realtime", CLOCK_REALTIME},
    {"monotonic", CLOCK_MONOTONIC},
    {"monotonic_raw", CLOCK_MONOTONIC_RAW},
    {"boottime", CLOCK_BOOTTIME},
};

// The max size of records dumped by kernel is 65535, and dump stack size
// should be a multiply of 8, so MAX_DUMP_STACK_SIZE is 65528.
constexpr uint32_t MAX_DUMP_STACK_SIZE = 65528;

// The max allowed pages in mapped buffer is decided by rlimit(RLIMIT_MEMLOCK).
// Here 1024 is a desired value for pages in mapped buffer. If mapped
// successfully, the buffer size = 1024 * 4K (page size) = 4M.
constexpr size_t DESIRED_PAGES_IN_MAPPED_BUFFER = 1024;

// Cache size used by CallChainJoiner to cache call chains in memory.
constexpr size_t DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE = 8 * 1024 * 1024;

class RecordCommand : public Command {
 public:
  RecordCommand()
      : Command(
            "record", "record sampling info in perf.data",
            // clang-format off
"Usage: simpleperf record [options] [--] [command [command-args]]\n"
"       Gather sampling information of running [command]. And -a/-p/-t option\n"
"       can be used to change target of sampling information.\n"
"       The default options are: -e cpu-cycles -f 4000 -o perf.data.\n"
"Select monitored threads:\n"
"-a     System-wide collection.\n"
#if defined(__ANDROID__)
"--app package_name    Profile the process of an Android application.\n"
"                      On non-rooted devices, the app must be debuggable,\n"
"                      because we use run-as to switch to the app's context.\n"
#endif
"-p pid1,pid2,...       Record events on existing processes. Mutually exclusive\n"
"                       with -a.\n"
"-t tid1,tid2,... Record events on existing threads. Mutually exclusive with -a.\n"
"\n"
"Select monitored event types:\n"
"-e event1[:modifier1],event2[:modifier2],...\n"
"             Select the event list to sample. Use `simpleperf list` to find\n"
"             all possible event names. Modifiers can be added to define how\n"
"             the event should be monitored.\n"
"             Possible modifiers are:\n"
"                u - monitor user space events only\n"
"                k - monitor kernel space events only\n"
"--group event1[:modifier],event2[:modifier2],...\n"
"             Similar to -e option. But events specified in the same --group\n"
"             option are monitored as a group, and scheduled in and out at the\n"
"             same time.\n"
"--trace-offcpu   Generate samples when threads are scheduled off cpu.\n"
"                 Similar to \"-c 1 -e sched:sched_switch\".\n"
"\n"
"Select monitoring options:\n"
"-f freq      Set event sample frequency. It means recording at most [freq]\n"
"             samples every second. For non-tracepoint events, the default\n"
"             option is -f 4000. A -f/-c option affects all event types\n"
"             following it until meeting another -f/-c option. For example,\n"
"             for \"-f 1000 cpu-cycles -c 1 -e sched:sched_switch\", cpu-cycles\n"
"             has sample freq 1000, sched:sched_switch event has sample period 1.\n"
"-c count     Set event sample period. It means recording one sample when\n"
"             [count] events happen. For tracepoint events, the default option\n"
"             is -c 1.\n"
"--call-graph fp | dwarf[,<dump_stack_size>]\n"
"             Enable call graph recording. Use frame pointer or dwarf debug\n"
"             frame as the method to parse call graph in stack.\n"
"             Default is dwarf,65528.\n"
"-g           Same as '--call-graph dwarf'.\n"
"--clockid clock_id      Generate timestamps of samples using selected clock.\n"
"                        Possible values are: realtime, monotonic,\n"
"                        monotonic_raw, boottime, perf. Default is perf.\n"
"--cpu cpu_item1,cpu_item2,...\n"
"             Collect samples only on the selected cpus. cpu_item can be cpu\n"
"             number like 1, or cpu range like 0-3.\n"
"--duration time_in_sec  Monitor for time_in_sec seconds instead of running\n"
"                        [command]. Here time_in_sec may be any positive\n"
"                        floating point number.\n"
"-j branch_filter1,branch_filter2,...\n"
"             Enable taken branch stack sampling. Each sample captures a series\n"
"             of consecutive taken branches.\n"
"             The following filters are defined:\n"
"                any: any type of branch\n"
"                any_call: any function call or system call\n"
"                any_ret: any function return or system call return\n"
"                ind_call: any indirect branch\n"
"                u: only when the branch target is at the user level\n"
"                k: only when the branch target is in the kernel\n"
"             This option requires at least one branch type among any, any_call,\n"
"             any_ret, ind_call.\n"
"-b           Enable taken branch stack sampling. Same as '-j any'.\n"
"-m mmap_pages   Set the size of the buffer used to receiving sample data from\n"
"                the kernel. It should be a power of 2. If not set, the max\n"
"                possible value <= 1024 will be used.\n"
"--no-inherit  Don't record created child threads/processes.\n"
"\n"
"Dwarf unwinding options:\n"
"--no-post-unwind   If `--call-graph dwarf` option is used, then the user's stack\n"
"                   will be recorded in perf.data and unwound after recording.\n"
"                   However, this takes a lot of disk space. Use this option to\n"
"                   unwind while recording.\n"
"--no-unwind   If `--call-graph dwarf` option is used, then the user's stack\n"
"              will be unwound by default. Use this option to disable the\n"
"              unwinding of the user's stack.\n"
"--no-callchain-joiner  If `--call-graph dwarf` option is used, then by default\n"
"                       callchain joiner is used to break the 64k stack limit\n"
"                       and build more complete call graphs. However, the built\n"
"                       call graphs may not be correct in all cases.\n"
"--callchain-joiner-min-matching-nodes count\n"
"               When callchain joiner is used, set the matched nodes needed to join\n"
"               callchains. The count should be >= 1. By default it is 1.\n"
"\n"
"Recording file options:\n"
"--no-dump-kernel-symbols  Don't dump kernel symbols in perf.data. By default\n"
"                          kernel symbols will be dumped when needed.\n"
"--no-dump-symbols       Don't dump symbols in perf.data. By default symbols are\n"
"                        dumped in perf.data, to support reporting in another\n"
"                        environment.\n"
"-o record_file_name    Set record file name, default is perf.data.\n"
"--exit-with-parent            Stop recording when the process starting\n"
"                              simpleperf dies.\n"
"--start_profiling_fd fd_no    After starting profiling, write \"STARTED\" to\n"
"                              <fd_no>, then close <fd_no>.\n"
"--symfs <dir>    Look for files with symbols relative to this directory.\n"
"                 This option is used to provide files with symbol table and\n"
"                 debug information, which are used for unwinding and dumping symbols.\n"
#if 0
// Below options are only used internally and shouldn't be visible to the public.
"--in-app         We are already running in the app's context.\n"
"--tracepoint-events file_name   Read tracepoint events from [file_name] instead of tracefs.\n"
#endif
            // clang-format on
            ),
        system_wide_collection_(false),
        branch_sampling_(0),
        fp_callchain_sampling_(false),
        dwarf_callchain_sampling_(false),
        dump_stack_size_in_dwarf_sampling_(MAX_DUMP_STACK_SIZE),
        unwind_dwarf_callchain_(true),
        post_unwind_(true),
        child_inherit_(true),
        duration_in_sec_(0),
        can_dump_kernel_symbols_(true),
        dump_symbols_(true),
        clockid_("perf"),
        event_selection_set_(false),
        mmap_page_range_(std::make_pair(1, DESIRED_PAGES_IN_MAPPED_BUFFER)),
        record_filename_("perf.data"),
        start_sampling_time_in_ns_(0),
        sample_record_count_(0),
        lost_record_count_(0),
        start_profiling_fd_(-1),
        in_app_context_(false),
        trace_offcpu_(false),
        exclude_kernel_callchain_(false),
        allow_callchain_joiner_(true),
        callchain_joiner_min_matching_nodes_(1u) {
    // If we run `adb shell simpleperf record xxx` and stop profiling by ctrl-c, adb closes
    // sockets connecting simpleperf. After that, simpleperf will receive SIGPIPE when writing
    // to stdout/stderr, which is a problem when we use '--app' option. So ignore SIGPIPE to
    // finish properly.
    signal(SIGPIPE, SIG_IGN);
    app_package_name_ = GetDefaultAppPackageName();
  }

  bool Run(const std::vector<std::string>& args);

 private:
  bool ParseOptions(const std::vector<std::string>& args,
                    std::vector<std::string>* non_option_args);
  bool PrepareRecording(Workload* workload);
  bool DoRecording(Workload* workload);
  bool PostProcessRecording(const std::vector<std::string>& args);
  bool TraceOffCpu();
  bool SetEventSelectionFlags();
  bool CreateAndInitRecordFile();
  std::unique_ptr<RecordFileWriter> CreateRecordFile(
      const std::string& filename);
  bool DumpKernelSymbol();
  bool DumpTracingData();
  bool DumpKernelAndModuleMmaps(const perf_event_attr& attr, uint64_t event_id);
  bool DumpThreadCommAndMmaps(const perf_event_attr& attr, uint64_t event_id);
  bool ProcessRecord(Record* record);
  bool SaveRecordForPostUnwinding(Record* record);
  bool SaveRecordAfterUnwinding(Record* record);
  bool SaveRecordWithoutUnwinding(Record* record);

  void UpdateRecordForEmbeddedElfPath(Record* record);
  bool UnwindRecord(SampleRecord& r);
  bool PostUnwindRecords();
  bool JoinCallChains();
  bool DumpAdditionalFeatures(const std::vector<std::string>& args);
  bool DumpBuildIdFeature();
  bool DumpFileFeature();
  bool DumpMetaInfoFeature();
  void CollectHitFileInfo(const SampleRecord& r);

  std::unique_ptr<SampleSpeed> sample_speed_;
  bool system_wide_collection_;
  uint64_t branch_sampling_;
  bool fp_callchain_sampling_;
  bool dwarf_callchain_sampling_;
  uint32_t dump_stack_size_in_dwarf_sampling_;
  bool unwind_dwarf_callchain_;
  bool post_unwind_;
  std::unique_ptr<OfflineUnwinder> offline_unwinder_;
  bool child_inherit_;
  double duration_in_sec_;
  bool can_dump_kernel_symbols_;
  bool dump_symbols_;
  std::string clockid_;
  std::vector<int> cpus_;
  EventSelectionSet event_selection_set_;

  std::pair<size_t, size_t> mmap_page_range_;

  ThreadTree thread_tree_;
  std::string record_filename_;
  std::unique_ptr<RecordFileWriter> record_file_writer_;

  uint64_t start_sampling_time_in_ns_;  // nanoseconds from machine starting

  uint64_t sample_record_count_;
  uint64_t lost_record_count_;
  int start_profiling_fd_;
  std::string app_package_name_;
  bool in_app_context_;
  bool trace_offcpu_;
  bool exclude_kernel_callchain_;

  // For CallChainJoiner
  bool allow_callchain_joiner_;
  size_t callchain_joiner_min_matching_nodes_;
  std::unique_ptr<CallChainJoiner> callchain_joiner_;
};

bool RecordCommand::Run(const std::vector<std::string>& args) {
  ScopedCurrentArch scoped_arch(GetMachineArch());
  if (!CheckPerfEventLimit()) {
    return false;
  }
  AllowMoreOpenedFiles();

  std::vector<std::string> workload_args;
  if (!ParseOptions(args, &workload_args)) {
    return false;
  }
  ScopedTempFiles scoped_temp_files(android::base::Dirname(record_filename_));
  if (!app_package_name_.empty() && !in_app_context_) {
    // Some users want to profile non debuggable apps on rooted devices. If we use run-as,
    // it will be impossible when using --app. So don't switch to app's context when we are
    // root.
    if (!IsRoot()) {
      return RunInAppContext(app_package_name_, "record", args, workload_args.size(),
                             record_filename_, true);
    }
  }
  std::unique_ptr<Workload> workload;
  if (!workload_args.empty()) {
    workload = Workload::CreateWorkload(workload_args);
    if (workload == nullptr) {
      return false;
    }
  }
  if (!PrepareRecording(workload.get())) {
    return false;
  }
  if (!DoRecording(workload.get())) {
    return false;
  }
  return PostProcessRecording(args);
}

bool RecordCommand::PrepareRecording(Workload* workload) {
  // 1. Prepare in other modules.
  if (!InitPerfClock()) {
    return false;
  }
  PrepareVdsoFile();

  // 2. Add default event type.
  if (event_selection_set_.empty()) {
    size_t group_id;
    if (!event_selection_set_.AddEventType(default_measured_event_type, &group_id)) {
      return false;
    }
    if (sample_speed_) {
      event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
    }
  }

  // 3. Process options before opening perf event files.
  exclude_kernel_callchain_ = event_selection_set_.ExcludeKernel();
  if (trace_offcpu_ && !TraceOffCpu()) {
    return false;
  }
  if (!SetEventSelectionFlags()) {
    return false;
  }
  if (unwind_dwarf_callchain_) {
    offline_unwinder_.reset(new OfflineUnwinder(false));
  }
  if (unwind_dwarf_callchain_ && allow_callchain_joiner_) {
    callchain_joiner_.reset(new CallChainJoiner(DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE,
                                                callchain_joiner_min_matching_nodes_,
                                                false));
  }

  // 4. Add monitored targets.
  bool need_to_check_targets = false;
  if (system_wide_collection_) {
    event_selection_set_.AddMonitoredThreads({-1});
  } else if (!event_selection_set_.HasMonitoredTarget()) {
    if (workload != nullptr) {
      event_selection_set_.AddMonitoredProcesses({workload->GetPid()});
      event_selection_set_.SetEnableOnExec(true);
      if (event_selection_set_.HasInplaceSampler()) {
        // Start worker early, because the worker process has to setup inplace-sampler server
        // before we try to connect it.
        if (!workload->Start()) {
          return false;
        }
      }
    } else if (!app_package_name_.empty()) {
      // If app process is not created, wait for it. This allows simpleperf starts before
      // app process. In this way, we can have a better support of app start-up time profiling.
      std::set<pid_t> pids = WaitForAppProcesses(app_package_name_);
      event_selection_set_.AddMonitoredProcesses(pids);
      need_to_check_targets = true;
    } else {
      LOG(ERROR)
          << "No threads to monitor. Try `simpleperf help record` for help";
      return false;
    }
  } else {
    need_to_check_targets = true;
  }

  // 5. Open perf event files and create mapped buffers.
  if (!event_selection_set_.OpenEventFiles(cpus_)) {
    return false;
  }
  if (!event_selection_set_.MmapEventFiles(mmap_page_range_.first,
                                           mmap_page_range_.second)) {
    return false;
  }

  // 6. Create perf.data.
  if (!CreateAndInitRecordFile()) {
    return false;
  }

  // 7. Add read/signal/periodic Events.
  auto callback =
      std::bind(&RecordCommand::ProcessRecord, this, std::placeholders::_1);
  if (!event_selection_set_.PrepareToReadMmapEventData(callback)) {
    return false;
  }
  if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) {
    return false;
  }
  IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
  if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM},
                             [loop]() { return loop->ExitLoop(); })) {
    return false;
  }

  // Only add an event for SIGHUP if we didn't inherit SIG_IGN (e.g. from nohup).
  if (!SignalIsIgnored(SIGHUP)) {
    if (!loop->AddSignalEvent(SIGHUP, [loop]() { return loop->ExitLoop(); })) {
      return false;
    }
  }

  if (duration_in_sec_ != 0) {
    if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_),
                                [loop]() { return loop->ExitLoop(); })) {
      return false;
    }
  }
  return true;
}

bool RecordCommand::DoRecording(Workload* workload) {
  // Write records in mapped buffers of perf_event_files to output file while workload is running.
  start_sampling_time_in_ns_ = GetPerfClock();
  LOG(VERBOSE) << "start_sampling_time is " << start_sampling_time_in_ns_ << " ns";
  if (workload != nullptr && !workload->IsStarted() && !workload->Start()) {
    return false;
  }
  if (start_profiling_fd_ != -1) {
    if (!android::base::WriteStringToFd("STARTED", start_profiling_fd_)) {
      PLOG(ERROR) << "failed to write to start_profiling_fd_";
    }
    close(start_profiling_fd_);
  }
  if (!event_selection_set_.GetIOEventLoop()->RunLoop()) {
    return false;
  }
  if (!event_selection_set_.FinishReadMmapEventData()) {
    return false;
  }
  return true;
}

bool RecordCommand::PostProcessRecording(const std::vector<std::string>& args) {
  // 1. Post unwind dwarf callchain.
  if (unwind_dwarf_callchain_ && post_unwind_) {
    if (!PostUnwindRecords()) {
      return false;
    }
  }

  // 2. Optionally join Callchains.
  if (callchain_joiner_) {
    JoinCallChains();
  }

  // 3. Dump additional features, and close record file.
  if (!DumpAdditionalFeatures(args)) {
    return false;
  }
  if (!record_file_writer_->Close()) {
    return false;
  }

  // 4. Show brief record result.
  LOG(INFO) << "Samples recorded: " << sample_record_count_
            << ". Samples lost: " << lost_record_count_ << ".";
  if (sample_record_count_ + lost_record_count_ != 0) {
    double lost_percent = static_cast<double>(lost_record_count_) /
                          (lost_record_count_ + sample_record_count_);
    constexpr double LOST_PERCENT_WARNING_BAR = 0.1;
    if (lost_percent >= LOST_PERCENT_WARNING_BAR) {
      LOG(WARNING) << "Lost " << (lost_percent * 100) << "% of samples, "
                   << "consider increasing mmap_pages(-m), "
                   << "or decreasing sample frequency(-f), "
                   << "or increasing sample period(-c).";
    }
  }
  if (callchain_joiner_) {
    callchain_joiner_->DumpStat();
  }
  return true;
}

bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
                                 std::vector<std::string>* non_option_args) {
  std::vector<size_t> wait_setting_speed_event_groups_;
  size_t i;
  for (i = 0; i < args.size() && !args[i].empty() && args[i][0] == '-'; ++i) {
    if (args[i] == "-a") {
      system_wide_collection_ = true;
    } else if (args[i] == "--app") {
      if (!NextArgumentOrError(args, &i)) {
        return false;
      }
      app_package_name_ = args[i];
    } else if (args[i] == "-b") {
      branch_sampling_ = branch_sampling_type_map["any"];
    } else if (args[i] == "-c" || args[i] == "-f") {
      if (!NextArgumentOrError(args, &i)) {
        return false;
      }
      char* endptr;
      uint64_t value = strtoull(args[i].c_str(), &endptr, 0);
      if (*endptr != '\0' || value == 0) {
        LOG(ERROR) << "Invalid option for " << args[i-1] << ": '" << args[i] << "'";
        return false;
      }
      if (args[i-1] == "-c") {
        sample_speed_.reset(new SampleSpeed(0, value));
      } else {
        sample_speed_.reset(new SampleSpeed(value, 0));
      }
      for (auto group_id : wait_setting_speed_event_groups_) {
        event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
      }
      wait_setting_speed_event_groups_.clear();

    } else if (args[i] == "--call-graph") {
      if (!NextArgumentOrError(args, &i)) {
        return false;
      }
      std::vector<std::string> strs = android::base::Split(args[i], ",");
      if (strs[0] == "fp") {
        fp_callchain_sampling_ = true;
        dwarf_callchain_sampling_ = false;
      } else if (strs[0] == "dwarf") {
        fp_callchain_sampling_ = false;
        dwarf_callchain_sampling_ = true;
        if (strs.size() > 1) {
          char* endptr;
          uint64_t size = strtoull(strs[1].c_str(), &endptr, 0);
          if (*endptr != '\0' || size > UINT_MAX) {
            LOG(ERROR) << "invalid dump stack size in --call-graph option: "
                       << strs[1];
            return false;
          }
          if ((size & 7) != 0) {
            LOG(ERROR) << "dump stack size " << size
                       << " is not 8-byte aligned.";
            return false;
          }
          if (size >= MAX_DUMP_STACK_SIZE) {
            LOG(ERROR) << "dump stack size " << size
                       << " is bigger than max allowed size "
                       << MAX_DUMP_STACK_SIZE << ".";
            return false;
          }
          dump_stack_size_in_dwarf_sampling_ = static_cast<uint32_t>(size);
        }
      } else {
        LOG(ERROR) << "unexpected argument for --call-graph option: "
                   << args[i];
        return false;
      }
    } else if (args[i] == "--clockid") {
      if (!NextArgumentOrError(args, &i)) {
        return false;
      }
      if (args[i] != "perf") {
        if (!IsSettingClockIdSupported()) {
          LOG(ERROR) << "Setting clockid is not supported by the kernel.";
          return false;
        }
        if (clockid_map.find(args[i]) == clockid_map.end()) {
          LOG(ERROR) << "Invalid clockid: " << args[i];
          return false;
        }
      }
      clockid_ = args[i];
    } else if (args[i] == "--cpu") {
      if (!NextArgumentOrError(args, &i)) {
        return false;
      }
      cpus_ = GetCpusFromString(args[i]);
    } else if (args[i] == "--duration") {
      if (!NextArgumentOrError(args, &i)) {
        return false;
      }
      if (!android::base::ParseDouble(args[i].c_str(), &duration_in_sec_,
                                      1e-9)) {
        LOG(ERROR) << "Invalid duration: " << args[i].c_str();
        return false;
      }
    } else if (args[i] == "-e") {
      if (!NextArgumentOrError(args, &i)) {
        return false;
      }
      std::vector<std::string> event_types = android::base::Split(args[i], ",");
      for (auto& event_type : event_types) {
        size_t group_id;
        if (!event_selection_set_.AddEventType(event_type, &group_id)) {
          return false;
        }
        if (sample_speed_) {
          event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
        } else {
          wait_setting_speed_event_groups_.push_back(group_id);
        }
      }
    } else if (args[i] == "--exit-with-parent") {
      prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
    } else if (args[i] == "-g") {
      fp_callchain_sampling_ = false;
      dwarf_callchain_sampling_ = true;
    } else if (args[i] == "--group") {
      if (!NextArgumentOrError(args, &i)) {
        return false;
      }
      std::vector<std::string> event_types = android::base::Split(args[i], ",");
      size_t group_id;
      if (!event_selection_set_.AddEventGroup(event_types, &group_id)) {
        return false;
      }
      if (sample_speed_) {
        event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
      } else {
        wait_setting_speed_event_groups_.push_back(group_id);
      }
    } else if (args[i] == "--in-app") {
      in_app_context_ = true;
    } else if (args[i] == "-j") {
      if (!NextArgumentOrError(args, &i)) {
        return false;
      }
      std::vector<std::string> branch_sampling_types =
          android::base::Split(args[i], ",");
      for (auto& type : branch_sampling_types) {
        auto it = branch_sampling_type_map.find(type);
        if (it == branch_sampling_type_map.end()) {
          LOG(ERROR) << "unrecognized branch sampling filter: " << type;
          return false;
        }
        branch_sampling_ |= it->second;
      }
    } else if (args[i] == "-m") {
      if (!NextArgumentOrError(args, &i)) {
        return false;
      }
      char* endptr;
      uint64_t pages = strtoull(args[i].c_str(), &endptr, 0);
      if (*endptr != '\0' || !IsPowerOfTwo(pages)) {
        LOG(ERROR) << "Invalid mmap_pages: '" << args[i] << "'";
        return false;
      }
      mmap_page_range_.first = mmap_page_range_.second = pages;
    } else if (args[i] == "--no-dump-kernel-symbols") {
      can_dump_kernel_symbols_ = false;
    } else if (args[i] == "--no-dump-symbols") {
      dump_symbols_ = false;
    } else if (args[i] == "--no-inherit") {
      child_inherit_ = false;
    } else if (args[i] == "--no-unwind") {
      unwind_dwarf_callchain_ = false;
    } else if (args[i] == "--no-callchain-joiner") {
      allow_callchain_joiner_ = false;
    } else if (args[i] == "--callchain-joiner-min-matching-nodes") {
      if (!NextArgumentOrError(args, &i)) {
        return false;
      }
      if (!android::base::ParseUint(args[i].c_str(), &callchain_joiner_min_matching_nodes_) ||
          callchain_joiner_min_matching_nodes_ < 1u) {
        LOG(ERROR) << "unexpected argument for " << args[i - 1] << " option";
        return false;
      }
    } else if (args[i] == "-o") {
      if (!NextArgumentOrError(args, &i)) {
        return false;
      }
      record_filename_ = args[i];
    } else if (args[i] == "-p") {
      if (!NextArgumentOrError(args, &i)) {
        return false;
      }
      std::set<pid_t> pids;
      if (!GetValidThreadsFromThreadString(args[i], &pids)) {
        return false;
      }
      event_selection_set_.AddMonitoredProcesses(pids);
    } else if (args[i] == "--no-post-unwind") {
      post_unwind_ = false;
    } else if (args[i] == "--start_profiling_fd") {
      if (!NextArgumentOrError(args, &i)) {
        return false;
      }
      if (!android::base::ParseInt(args[i].c_str(), &start_profiling_fd_, 0)) {
        LOG(ERROR) << "Invalid start_profiling_fd: " << args[i];
        return false;
      }
    } else if (args[i] == "--symfs") {
      if (!NextArgumentOrError(args, &i)) {
        return false;
      }
      if (!Dso::SetSymFsDir(args[i])) {
        return false;
      }
    } else if (args[i] == "-t") {
      if (!NextArgumentOrError(args, &i)) {
        return false;
      }
      std::set<pid_t> tids;
      if (!GetValidThreadsFromThreadString(args[i], &tids)) {
        return false;
      }
      event_selection_set_.AddMonitoredThreads(tids);
    } else if (args[i] == "--trace-offcpu") {
      trace_offcpu_ = true;
    } else if (args[i] == "--tracepoint-events") {
      if (!NextArgumentOrError(args, &i)) {
        return false;
      }
      if (!SetTracepointEventsFilePath(args[i])) {
        return false;
      }
    } else if (args[i] == "--") {
      i++;
      break;
    } else {
      ReportUnknownOption(args, i);
      return false;
    }
  }

  if (!dwarf_callchain_sampling_) {
    if (!unwind_dwarf_callchain_) {
      LOG(ERROR)
          << "--no-unwind is only used with `--call-graph dwarf` option.";
      return false;
    }
    unwind_dwarf_callchain_ = false;
  }
  if (post_unwind_) {
    if (!dwarf_callchain_sampling_ || !unwind_dwarf_callchain_) {
      post_unwind_ = false;
    }
  } else {
    if (!dwarf_callchain_sampling_) {
      LOG(ERROR)
          << "--no-post-unwind is only used with `--call-graph dwarf` option.";
      return false;
    }
    if (!unwind_dwarf_callchain_) {
      LOG(ERROR) << "--no-post-unwind can't be used with `--no-unwind` option.";
      return false;
    }
  }

  if (fp_callchain_sampling_) {
    if (GetBuildArch() == ARCH_ARM) {
      LOG(WARNING) << "`--callgraph fp` option doesn't work well on arm architecture, "
                   << "consider using `-g` option or profiling on aarch64 architecture.";
    }
  }

  if (system_wide_collection_ && event_selection_set_.HasMonitoredTarget()) {
    LOG(ERROR) << "Record system wide and existing processes/threads can't be "
                  "used at the same time.";
    return false;
  }

  if (system_wide_collection_ && !IsRoot()) {
    LOG(ERROR) << "System wide profiling needs root privilege.";
    return false;
  }

  if (dump_symbols_ && can_dump_kernel_symbols_) {
    // No need to dump kernel symbols as we will dump all required symbols.
    can_dump_kernel_symbols_ = false;
  }

  non_option_args->clear();
  for (; i < args.size(); ++i) {
    non_option_args->push_back(args[i]);
  }
  return true;
}

bool RecordCommand::TraceOffCpu() {
  if (FindEventTypeByName("sched:sched_switch") == nullptr) {
    LOG(ERROR) << "Can't trace off cpu because sched:sched_switch event is not available";
    return false;
  }
  for (auto& event_type : event_selection_set_.GetTracepointEvents()) {
    if (event_type->name == "sched:sched_switch") {
      LOG(ERROR) << "Trace offcpu can't be used together with sched:sched_switch event";
      return false;
    }
  }
  if (!IsDumpingRegsForTracepointEventsSupported()) {
    LOG(ERROR) << "Dumping regs for tracepoint events is not supported by the kernel";
    return false;
  }
  return event_selection_set_.AddEventType("sched:sched_switch");
}

bool RecordCommand::SetEventSelectionFlags() {
  event_selection_set_.SampleIdAll();
  if (!event_selection_set_.SetBranchSampling(branch_sampling_)) {
    return false;
  }
  if (fp_callchain_sampling_) {
    event_selection_set_.EnableFpCallChainSampling();
  } else if (dwarf_callchain_sampling_) {
    if (!event_selection_set_.EnableDwarfCallChainSampling(
            dump_stack_size_in_dwarf_sampling_)) {
      return false;
    }
  }
  event_selection_set_.SetInherit(child_inherit_);
  if (clockid_ != "perf") {
    event_selection_set_.SetClockId(clockid_map[clockid_]);
  }
  return true;
}

bool RecordCommand::CreateAndInitRecordFile() {
  record_file_writer_ = CreateRecordFile(record_filename_);
  if (record_file_writer_ == nullptr) {
    return false;
  }
  // Use first perf_event_attr and first event id to dump mmap and comm records.
  EventAttrWithId attr_id = event_selection_set_.GetEventAttrWithId()[0];
  if (!DumpKernelSymbol()) {
    return false;
  }
  if (!DumpTracingData()) {
    return false;
  }
  if (!DumpKernelAndModuleMmaps(*attr_id.attr, attr_id.ids[0])) {
    return false;
  }
  if (!DumpThreadCommAndMmaps(*attr_id.attr, attr_id.ids[0])) {
    return false;
  }
  return true;
}

std::unique_ptr<RecordFileWriter> RecordCommand::CreateRecordFile(
    const std::string& filename) {
  std::unique_ptr<RecordFileWriter> writer =
      RecordFileWriter::CreateInstance(filename);
  if (writer == nullptr) {
    return nullptr;
  }

  if (!writer->WriteAttrSection(event_selection_set_.GetEventAttrWithId())) {
    return nullptr;
  }
  return writer;
}

bool RecordCommand::DumpKernelSymbol() {
  if (can_dump_kernel_symbols_) {
    std::string kallsyms;
    if (event_selection_set_.NeedKernelSymbol() &&
        CheckKernelSymbolAddresses()) {
      if (!android::base::ReadFileToString("/proc/kallsyms", &kallsyms)) {
        PLOG(ERROR) << "failed to read /proc/kallsyms";
        return false;
      }
      KernelSymbolRecord r(kallsyms);
      if (!ProcessRecord(&r)) {
        return false;
      }
    }
  }
  return true;
}

bool RecordCommand::DumpTracingData() {
  std::vector<const EventType*> tracepoint_event_types =
      event_selection_set_.GetTracepointEvents();
  if (tracepoint_event_types.empty() || !CanRecordRawData()) {
    return true;  // No need to dump tracing data, or can't do it.
  }
  std::vector<char> tracing_data;
  if (!GetTracingData(tracepoint_event_types, &tracing_data)) {
    return false;
  }
  TracingDataRecord record(tracing_data);
  if (!ProcessRecord(&record)) {
    return false;
  }
  return true;
}

bool RecordCommand::DumpKernelAndModuleMmaps(const perf_event_attr& attr,
                                             uint64_t event_id) {
  KernelMmap kernel_mmap;
  std::vector<KernelMmap> module_mmaps;
  GetKernelAndModuleMmaps(&kernel_mmap, &module_mmaps);

  MmapRecord mmap_record(attr, true, UINT_MAX, 0, kernel_mmap.start_addr,
                         kernel_mmap.len, 0, kernel_mmap.filepath, event_id);
  if (!ProcessRecord(&mmap_record)) {
    return false;
  }
  for (auto& module_mmap : module_mmaps) {
    MmapRecord mmap_record(attr, true, UINT_MAX, 0, module_mmap.start_addr,
                           module_mmap.len, 0, module_mmap.filepath, event_id);
    if (!ProcessRecord(&mmap_record)) {
      return false;
    }
  }
  return true;
}

bool RecordCommand::DumpThreadCommAndMmaps(const perf_event_attr& attr,
                                           uint64_t event_id) {
  // Decide which processes and threads to dump.
  // For system_wide profiling, dump all threads.
  // For non system wide profiling, build dump_threads.
  bool all_threads = system_wide_collection_;
  std::set<pid_t> dump_threads = event_selection_set_.GetMonitoredThreads();
  for (const auto& pid : event_selection_set_.GetMonitoredProcesses()) {
    std::vector<pid_t> tids = GetThreadsInProcess(pid);
    dump_threads.insert(tids.begin(), tids.end());
  }

  // Collect processes to dump.
  std::vector<pid_t> processes;
  if (all_threads) {
    processes = GetAllProcesses();
  } else {
    std::set<pid_t> process_set;
    for (const auto& tid : dump_threads) {
      pid_t pid;
      if (!GetProcessForThread(tid, &pid)) {
        continue;
      }
      process_set.insert(pid);
    }
    processes.insert(processes.end(), process_set.begin(), process_set.end());
  }

  // Dump each process and its threads.
  for (auto& pid : processes) {
    // Dump mmap records.
    std::vector<ThreadMmap> thread_mmaps;
    if (!GetThreadMmapsInProcess(pid, &thread_mmaps)) {
      // The process may exit before we get its info.
      continue;
    }
    for (const auto& map : thread_mmaps) {
      if (map.executable == 0) {
        continue;  // No need to dump non-executable mmap info.
      }
      MmapRecord record(attr, false, pid, pid, map.start_addr, map.len,
                        map.pgoff, map.name, event_id);
      if (!ProcessRecord(&record)) {
        return false;
      }
    }
    // Dump process name.
    std::string name;
    if (GetThreadName(pid, &name)) {
      CommRecord record(attr, pid, pid, name, event_id, 0);
      if (!ProcessRecord(&record)) {
        return false;
      }
    }
    // Dump thread info.
    std::vector<pid_t> threads = GetThreadsInProcess(pid);
    for (const auto& tid : threads) {
      if (tid == pid) {
        continue;
      }
      if (all_threads || dump_threads.find(tid) != dump_threads.end()) {
        ForkRecord fork_record(attr, pid, tid, pid, pid, event_id);
        if (!ProcessRecord(&fork_record)) {
          return false;
        }
        if (GetThreadName(tid, &name)) {
          CommRecord comm_record(attr, pid, tid, name, event_id, 0);
          if (!ProcessRecord(&comm_record)) {
            return false;
          }
        }
      }
    }
  }
  return true;
}

bool RecordCommand::ProcessRecord(Record* record) {
  if (unwind_dwarf_callchain_) {
    if (post_unwind_) {
      return SaveRecordForPostUnwinding(record);
    }
    return SaveRecordAfterUnwinding(record);
  }
  return SaveRecordWithoutUnwinding(record);
}

bool RecordCommand::SaveRecordForPostUnwinding(Record* record) {
  if (record->type() == PERF_RECORD_SAMPLE) {
    static_cast<SampleRecord*>(record)->RemoveInvalidStackData();
  }
  if (!record_file_writer_->WriteRecord(*record)) {
    LOG(ERROR) << "If there isn't enough space for storing profiling data, consider using "
               << "--no-post-unwind option.";
    return false;
  }
  return true;
}

bool RecordCommand::SaveRecordAfterUnwinding(Record* record) {
  if (record->type() == PERF_RECORD_SAMPLE) {
    auto& r = *static_cast<SampleRecord*>(record);
    // AdjustCallChainGeneratedByKernel() should go before UnwindRecord(). Because we don't want
    // to adjust callchains generated by dwarf unwinder.
    r.AdjustCallChainGeneratedByKernel();
    if (!UnwindRecord(r)) {
      return false;
    }
    // ExcludeKernelCallChain() should go after UnwindRecord() to notice the generated user call
    // chain.
    if (r.InKernel() && exclude_kernel_callchain_ && r.ExcludeKernelCallChain() == 0u) {
      // If current record contains no user callchain, skip it.
      return true;
    }
    sample_record_count_++;
  } else if (record->type() == PERF_RECORD_LOST) {
    lost_record_count_ += static_cast<LostRecord*>(record)->lost;
  } else {
    UpdateRecordForEmbeddedElfPath(record);
    thread_tree_.Update(*record);
  }
  return record_file_writer_->WriteRecord(*record);
}

bool RecordCommand::SaveRecordWithoutUnwinding(Record* record) {
  if (record->type() == PERF_RECORD_SAMPLE) {
    auto& r = *static_cast<SampleRecord*>(record);
    if (fp_callchain_sampling_ || dwarf_callchain_sampling_) {
      r.AdjustCallChainGeneratedByKernel();
    }
    if (r.InKernel() && exclude_kernel_callchain_ && r.ExcludeKernelCallChain() == 0u) {
      // If current record contains no user callchain, skip it.
      return true;
    }
    sample_record_count_++;
  } else if (record->type() == PERF_RECORD_LOST) {
    lost_record_count_ += static_cast<LostRecord*>(record)->lost;
  }
  return record_file_writer_->WriteRecord(*record);
}

template <class RecordType>
void UpdateMmapRecordForEmbeddedElfPath(RecordType* record) {
  RecordType& r = *record;
  if (!r.InKernel() && r.data->pgoff != 0) {
    // For the case of a shared library "foobar.so" embedded
    // inside an APK, we rewrite the original MMAP from
    // ["path.apk" offset=X] to ["path.apk!/foobar.so" offset=W]
    // so as to make the library name explicit. This update is
    // done here (as part of the record operation) as opposed to
    // on the host during the report, since we want to report
    // the correct library name even if the the APK in question
    // is not present on the host. The new offset W is
    // calculated to be with respect to the start of foobar.so,
    // not to the start of path.apk.
    EmbeddedElf* ee =
        ApkInspector::FindElfInApkByOffset(r.filename, r.data->pgoff);
    if (ee != nullptr) {
      // Compute new offset relative to start of elf in APK.
      auto data = *r.data;
      data.pgoff -= ee->entry_offset();
      r.SetDataAndFilename(data, GetUrlInApk(r.filename, ee->entry_name()));
    }
  }
}

void RecordCommand::UpdateRecordForEmbeddedElfPath(Record* record) {
  if (record->type() == PERF_RECORD_MMAP) {
    UpdateMmapRecordForEmbeddedElfPath(static_cast<MmapRecord*>(record));
  } else if (record->type() == PERF_RECORD_MMAP2) {
    UpdateMmapRecordForEmbeddedElfPath(static_cast<Mmap2Record*>(record));
  }
}

bool RecordCommand::UnwindRecord(SampleRecord& r) {
  if ((r.sample_type & PERF_SAMPLE_CALLCHAIN) &&
      (r.sample_type & PERF_SAMPLE_REGS_USER) &&
      (r.regs_user_data.reg_mask != 0) &&
      (r.sample_type & PERF_SAMPLE_STACK_USER) &&
      (r.GetValidStackSize() > 0)) {
    ThreadEntry* thread =
        thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
    RegSet regs(r.regs_user_data.abi, r.regs_user_data.reg_mask, r.regs_user_data.regs);
    std::vector<uint64_t> ips;
    std::vector<uint64_t> sps;
    if (!offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data,
                                            r.GetValidStackSize(), &ips, &sps)) {
      return false;
    }
    r.ReplaceRegAndStackWithCallChain(ips);
    if (callchain_joiner_) {
      return callchain_joiner_->AddCallChain(r.tid_data.pid, r.tid_data.tid,
                                             CallChainJoiner::ORIGINAL_OFFLINE, ips, sps);
    }
  }
  return true;
}

bool RecordCommand::PostUnwindRecords() {
  // 1. Move records from record_filename_ to a temporary file.
  if (!record_file_writer_->Close()) {
    return false;
  }
  record_file_writer_.reset();
  std::unique_ptr<TemporaryFile> tmp_file = ScopedTempFiles::CreateTempFile();
  if (!Workload::RunCmd({"mv", record_filename_, tmp_file->path})) {
    return false;
  }
  std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmp_file->path);
  if (!reader) {
    return false;
  }

  // 2. Read records from the temporary file, and write unwound records back to record_filename_.
  record_file_writer_ = CreateRecordFile(record_filename_);
  if (!record_file_writer_) {
    return false;
  }
  sample_record_count_ = 0;
  lost_record_count_ = 0;
  auto callback = [this](std::unique_ptr<Record> record) {
    return SaveRecordAfterUnwinding(record.get());
  };
  return reader->ReadDataSection(callback, false);
}

bool RecordCommand::JoinCallChains() {
  // 1. Prepare joined callchains.
  if (!callchain_joiner_->JoinCallChains()) {
    return false;
  }
  // 2. Move records from record_filename_ to a temporary file.
  if (!record_file_writer_->Close()) {
    return false;
  }
  record_file_writer_.reset();
  std::unique_ptr<TemporaryFile> tmp_file = ScopedTempFiles::CreateTempFile();
  if (!Workload::RunCmd({"mv", record_filename_, tmp_file->path})) {
    return false;
  }

  // 3. Read records from the temporary file, and write record with joined call chains back
  // to record_filename_.
  std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmp_file->path);
  record_file_writer_ = CreateRecordFile(record_filename_);
  if (!reader || !record_file_writer_) {
    return false;
  }

  auto record_callback = [&](std::unique_ptr<Record> r) {
    if (r->type() != PERF_RECORD_SAMPLE) {
      return record_file_writer_->WriteRecord(*r);
    }
    SampleRecord& sr = *static_cast<SampleRecord*>(r.get());
    if (!sr.HasUserCallChain()) {
      return record_file_writer_->WriteRecord(sr);
    }
    pid_t pid;
    pid_t tid;
    CallChainJoiner::ChainType type;
    std::vector<uint64_t> ips;
    std::vector<uint64_t> sps;
    if (!callchain_joiner_->GetNextCallChain(pid, tid, type, ips, sps)) {
      return false;
    }
    CHECK_EQ(type, CallChainJoiner::JOINED_OFFLINE);
    CHECK_EQ(pid, static_cast<pid_t>(sr.tid_data.pid));
    CHECK_EQ(tid, static_cast<pid_t>(sr.tid_data.tid));
    sr.UpdateUserCallChain(ips);
    return record_file_writer_->WriteRecord(sr);
  };
  return reader->ReadDataSection(record_callback, false);
}

bool RecordCommand::DumpAdditionalFeatures(
    const std::vector<std::string>& args) {
  // Read data section of perf.data to collect hit file information.
  thread_tree_.ClearThreadAndMap();
  if (CheckKernelSymbolAddresses()) {
    Dso::ReadKernelSymbolsFromProc();
  }
  auto callback = [&](const Record* r) {
    thread_tree_.Update(*r);
    if (r->type() == PERF_RECORD_SAMPLE) {
      CollectHitFileInfo(*reinterpret_cast<const SampleRecord*>(r));
    }
  };
  if (!record_file_writer_->ReadDataSection(callback)) {
    return false;
  }

  size_t feature_count = 5;
  if (branch_sampling_) {
    feature_count++;
  }
  if (dump_symbols_) {
    feature_count++;
  }
  if (!record_file_writer_->BeginWriteFeatures(feature_count)) {
    return false;
  }
  if (!DumpBuildIdFeature()) {
    return false;
  }
  if (dump_symbols_ && !DumpFileFeature()) {
    return false;
  }
  utsname uname_buf;
  if (TEMP_FAILURE_RETRY(uname(&uname_buf)) != 0) {
    PLOG(ERROR) << "uname() failed";
    return false;
  }
  if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_OSRELEASE,
                                               uname_buf.release)) {
    return false;
  }
  if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_ARCH,
                                               uname_buf.machine)) {
    return false;
  }

  std::string exec_path = android::base::GetExecutablePath();
  if (exec_path.empty()) exec_path = "simpleperf";
  std::vector<std::string> cmdline;
  cmdline.push_back(exec_path);
  cmdline.push_back("record");
  cmdline.insert(cmdline.end(), args.begin(), args.end());
  if (!record_file_writer_->WriteCmdlineFeature(cmdline)) {
    return false;
  }
  if (branch_sampling_ != 0 &&
      !record_file_writer_->WriteBranchStackFeature()) {
    return false;
  }
  if (!DumpMetaInfoFeature()) {
    return false;
  }

  if (!record_file_writer_->EndWriteFeatures()) {
    return false;
  }
  return true;
}

bool RecordCommand::DumpBuildIdFeature() {
  std::vector<BuildIdRecord> build_id_records;
  BuildId build_id;
  std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
  for (Dso* dso : dso_v) {
    if (!dso->HasDumpId()) {
      continue;
    }
    if (dso->type() == DSO_KERNEL) {
      if (!GetKernelBuildId(&build_id)) {
        continue;
      }
      build_id_records.push_back(
          BuildIdRecord(true, UINT_MAX, build_id, dso->Path()));
    } else if (dso->type() == DSO_KERNEL_MODULE) {
      std::string path = dso->Path();
      std::string module_name = basename(&path[0]);
      if (android::base::EndsWith(module_name, ".ko")) {
        module_name = module_name.substr(0, module_name.size() - 3);
      }
      if (!GetModuleBuildId(module_name, &build_id)) {
        LOG(DEBUG) << "can't read build_id for module " << module_name;
        continue;
      }
      build_id_records.push_back(BuildIdRecord(true, UINT_MAX, build_id, path));
    } else {
      if (dso->Path() == DEFAULT_EXECNAME_FOR_THREAD_MMAP) {
        continue;
      }
      auto tuple = SplitUrlInApk(dso->Path());
      if (std::get<0>(tuple)) {
        ElfStatus result = GetBuildIdFromApkFile(std::get<1>(tuple),
                                                 std::get<2>(tuple), &build_id);
        if (result != ElfStatus::NO_ERROR) {
          LOG(DEBUG) << "can't read build_id from file " << dso->Path() << ": "
                     << result;
          continue;
        }
      } else {
        ElfStatus result = GetBuildIdFromElfFile(dso->Path(), &build_id);
        if (result != ElfStatus::NO_ERROR) {
          LOG(DEBUG) << "can't read build_id from file " << dso->Path() << ": "
                     << result;
          continue;
        }
      }
      build_id_records.push_back(
          BuildIdRecord(false, UINT_MAX, build_id, dso->Path()));
    }
  }
  if (!record_file_writer_->WriteBuildIdFeature(build_id_records)) {
    return false;
  }
  return true;
}

bool RecordCommand::DumpFileFeature() {
  std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
  return record_file_writer_->WriteFileFeatures(thread_tree_.GetAllDsos());
}

bool RecordCommand::DumpMetaInfoFeature() {
  std::unordered_map<std::string, std::string> info_map;
  info_map["simpleperf_version"] = GetSimpleperfVersion();
  info_map["system_wide_collection"] = system_wide_collection_ ? "true" : "false";
  info_map["trace_offcpu"] = trace_offcpu_ ? "true" : "false";
  // By storing event types information in perf.data, the readers of perf.data have the same
  // understanding of event types, even if they are on another machine.
  info_map["event_type_info"] = ScopedEventTypes::BuildString(event_selection_set_.GetEvents());
#if defined(__ANDROID__)
  info_map["product_props"] = android::base::StringPrintf("%s:%s:%s",
                                  android::base::GetProperty("ro.product.manufacturer", "").c_str(),
                                  android::base::GetProperty("ro.product.model", "").c_str(),
                                  android::base::GetProperty("ro.product.name", "").c_str());
  info_map["android_version"] = android::base::GetProperty("ro.build.version.release", "");
#endif
  info_map["clockid"] = clockid_;
  info_map["timestamp"] = std::to_string(time(nullptr));
  return record_file_writer_->WriteMetaInfoFeature(info_map);
}

void RecordCommand::CollectHitFileInfo(const SampleRecord& r) {
  const ThreadEntry* thread =
      thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
  const MapEntry* map =
      thread_tree_.FindMap(thread, r.ip_data.ip, r.InKernel());
  Dso* dso = map->dso;
  const Symbol* symbol;
  if (dump_symbols_) {
    symbol = thread_tree_.FindSymbol(map, r.ip_data.ip, nullptr, &dso);
    if (!symbol->HasDumpId()) {
      dso->CreateSymbolDumpId(symbol);
    }
  }
  if (!dso->HasDumpId()) {
    dso->CreateDumpId();
  }
  if (r.sample_type & PERF_SAMPLE_CALLCHAIN) {
    bool in_kernel = r.InKernel();
    bool first_ip = true;
    for (uint64_t i = 0; i < r.callchain_data.ip_nr; ++i) {
      uint64_t ip = r.callchain_data.ips[i];
      if (ip >= PERF_CONTEXT_MAX) {
        switch (ip) {
          case PERF_CONTEXT_KERNEL:
            in_kernel = true;
            break;
          case PERF_CONTEXT_USER:
            in_kernel = false;
            break;
          default:
            LOG(DEBUG) << "Unexpected perf_context in callchain: " << std::hex
                       << ip;
        }
      } else {
        if (first_ip) {
          first_ip = false;
          // Remove duplication with sample ip.
          if (ip == r.ip_data.ip) {
            continue;
          }
        }
        map = thread_tree_.FindMap(thread, ip, in_kernel);
        dso = map->dso;
        if (dump_symbols_) {
          symbol = thread_tree_.FindSymbol(map, ip, nullptr, &dso);
          if (!symbol->HasDumpId()) {
            dso->CreateSymbolDumpId(symbol);
          }
        }
        if (!dso->HasDumpId()) {
          dso->CreateDumpId();
        }
      }
    }
  }
}

void RegisterRecordCommand() {
  RegisterCommand("record",
                  [] { return std::unique_ptr<Command>(new RecordCommand()); });
}