#!/system/bin/sh # Copyright (C) 2010 The Android Open Source Project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. set -e # Default product ID in crash report (used if GOOGLE_CRASH_* is undefined). BRILLO_PRODUCT=Brillo # Base directory that contains any crash reporter state files. CRASH_STATE_DIR="/data/misc/crash_reporter" # File containing crash_reporter's anonymized guid. GUID_FILE="${CRASH_STATE_DIR}/guid" # Crash sender lock in case the sender is already running. CRASH_SENDER_LOCK="${CRASH_STATE_DIR}/lock/crash_sender" # Path to file that indicates a crash test is currently running. CRASH_TEST_IN_PROGRESS_FILE="${CRASH_STATE_DIR}/tmp/crash-test-in-progress" # Set this to 1 in the environment to allow uploading crash reports # for unofficial versions. FORCE_OFFICIAL=${FORCE_OFFICIAL:-0} # Path to hardware class description. HWCLASS_PATH="/sys/devices/platform/chromeos_acpi/HWID" # Path to file that indicates this is a developer image. LEAVE_CORE_FILE="${CRASH_STATE_DIR}/.leave_core" # Path to list_proxies. LIST_PROXIES="list_proxies" # Maximum crashes to send per day. MAX_CRASH_RATE=${MAX_CRASH_RATE:-32} # File whose existence mocks crash sending. If empty we pretend the # crash sending was successful, otherwise unsuccessful. MOCK_CRASH_SENDING="${CRASH_STATE_DIR}/tmp/mock-crash-sending" # Set this to 1 in the environment to pretend to have booted in developer # mode. This is used by autotests. MOCK_DEVELOPER_MODE=${MOCK_DEVELOPER_MODE:-0} # Ignore PAUSE_CRASH_SENDING file if set. OVERRIDE_PAUSE_SENDING=${OVERRIDE_PAUSE_SENDING:-0} # File whose existence causes crash sending to be delayed (for testing). # Must be stateful to enable testing kernel crashes. PAUSE_CRASH_SENDING="${CRASH_STATE_DIR}/lock/crash_sender_paused" # Path to a directory of restricted certificates which includes # a certificate for the crash server. RESTRICTED_CERTIFICATES_PATH="/system/etc/security/cacerts" RESTRICTED_CERTIFICATES_PATH_GOOGLE="/system/etc/security/cacerts_google" # File whose existence implies we're running and not to start again. RUN_FILE="${CRASH_STATE_DIR}/run/crash_sender.pid" # Maximum time to sleep between sends. SECONDS_SEND_SPREAD=${SECONDS_SEND_SPREAD:-600} # Set this to 1 to allow uploading of device coredumps. DEVCOREDUMP_UPLOAD_FLAG_FILE="${CRASH_STATE_DIR}/device_coredump_upload_allowed" # The weave configuration file. WEAVE_CONF_FILE="/etc/weaved/weaved.conf" # The os-release.d folder. OSRELEASED_FOLDER="/etc/os-release.d" # The syslog tag for all logging we emit. TAG="$(basename $0)[$$]" # Directory to store timestamp files indicating the uploads in the past 24 # hours. TIMESTAMPS_DIR="${CRASH_STATE_DIR}/crash_sender" # Temp directory for this process. TMP_DIR="" # Crash report log file. CRASH_LOG="${CRASH_STATE_DIR}/log/uploads.log" lecho() { log -t "${TAG}" "$@" } lwarn() { lecho -psyslog.warn "$@" } # Returns true if mock is enabled. is_mock() { [ -f "${MOCK_CRASH_SENDING}" ] && return 0 return 1 } is_mock_successful() { local mock_in=$(cat "${MOCK_CRASH_SENDING}") [ "${mock_in}" = "" ] && return 0 # empty file means success return 1 } cleanup() { if [ -n "${TMP_DIR}" ]; then rm -rf "${TMP_DIR}" fi rm -f "${RUN_FILE}" if [ -n "${CRASH_SENDER_LOCK}" ]; then rm -rf "${CRASH_SENDER_LOCK}" fi crash_done } crash_done() { if is_mock; then # For testing purposes, emit a message to log so that we # know when the test has received all the messages from this run. lecho "crash_sender done." fi } is_official_image() { [ ${FORCE_OFFICIAL} -ne 0 ] && return 0 if [ "$(getprop ro.secure)" = "1" ]; then return 0 else return 1 fi } # Returns 0 if the a crash test is currently running. NOTE: Mirrors # crash_collector.cc:CrashCollector::IsCrashTestInProgress(). is_crash_test_in_progress() { [ -f "${CRASH_TEST_IN_PROGRESS_FILE}" ] && return 0 return 1 } # Returns 0 if we should consider ourselves to be running on a developer # image. NOTE: Mirrors crash_collector.cc:CrashCollector::IsDeveloperImage(). is_developer_image() { # If we're testing crash reporter itself, we don't want to special-case # for developer images. is_crash_test_in_progress && return 1 [ -f "${LEAVE_CORE_FILE}" ] && return 0 return 1 } # Returns 0 if we should consider ourselves to be running on a test image. is_test_image() { # If we're testing crash reporter itself, we don't want to special-case # for test images. is_crash_test_in_progress && return 1 case $(get_channel) in test*) return 0;; esac return 1 } # Returns 0 if the machine booted up in developer mode. is_developer_mode() { [ ${MOCK_DEVELOPER_MODE} -ne 0 ] && return 0 # If we're testing crash reporter itself, we don't want to special-case # for developer mode. is_crash_test_in_progress && return 1 if [ "$(getprop ro.debuggable)" = "1" ]; then return 0 else return 1 fi } # Returns the path of the certificates directory to be used when sending # reports to the crash server. # If crash_reporter.full_certs=1, return the full certificates path. # Otherwise return the Google-specific certificates path. get_certificates_path() { if [ "$(getprop crash_reporter.full_certs)" = "1" ]; then echo "${RESTRICTED_CERTIFICATES_PATH}" else echo "${RESTRICTED_CERTIFICATES_PATH_GOOGLE}" fi } # Return 0 if the uploading of device coredumps is allowed. is_device_coredump_upload_allowed() { [ -f "${DEVCOREDUMP_UPLOAD_FLAG_FILE}" ] && return 0 return 1 } # Generate a uniform random number in 0..max-1. # POSIX arithmetic expansion requires support of at least signed long integers. # On 32-bit systems, that may mean 32-bit signed integers, in which case the # 32-bit random number read from /dev/urandom may be interpreted as negative # when used inside an arithmetic expansion (since the high bit might be set). # mksh at least is known to behave this way. # For this case, simply take the absolute value, which will still give a # roughly uniform random distribution for the modulo (as we are merely ignoring # the high/sign bit). # See corresponding Arithmetic Expansion and Arithmetic Expression sections: # POSIX: http://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_06_04 # mksh: http://linux.die.net/man/1/mksh generate_uniform_random() { local max=$1 local random="$(od -An -N4 -tu /dev/urandom)" echo $(((random < 0 ? -random : random) % max)) } # Check if sending a crash now does not exceed the maximum 24hr rate and # commit to doing so, if not. check_rate() { mkdir -p ${TIMESTAMPS_DIR} # Only consider minidumps written in the past 24 hours by removing all older. find "${TIMESTAMPS_DIR}" -mindepth 1 -mtime +1 \ -exec rm -- '{}' ';' local sends_in_24hrs=$(echo "${TIMESTAMPS_DIR}"/* | wc -w) lecho "Current send rate: ${sends_in_24hrs}sends/24hrs" if [ ${sends_in_24hrs} -ge ${MAX_CRASH_RATE} ]; then lecho "Cannot send more crashes:" lecho " current ${sends_in_24hrs}send/24hrs >= " \ "max ${MAX_CRASH_RATE}send/24hrs" return 1 fi mktemp "${TIMESTAMPS_DIR}"/XXXXXX > /dev/null return 0 } # Gets the base part of a crash report file, such as name.01234.5678.9012 from # name.01234.5678.9012.meta or name.01234.5678.9012.log.tar.xz. We make sure # "name" is sanitized in CrashCollector::Sanitize to not include any periods. get_base() { echo "$1" | cut -d. -f-4 } get_extension() { local extension="${1##*.}" local filename="${1%.*}" # For gzipped file, we ignore .gz and get the real extension if [ "${extension}" = "gz" ]; then echo "${filename##*.}" else echo "${extension}" fi } # Return which kind of report the given metadata file relates to get_kind() { local payload="$(get_key_value "$1" "payload")" if [ ! -r "${payload}" ]; then lecho "Missing payload: ${payload}" echo "undefined" return fi local kind="$(get_extension "${payload}")" if [ "${kind}" = "dmp" ]; then echo "minidump" return fi echo "${kind}" } get_key_value() { local file="$1" key="$2" value if [ -f "${file}/${key}" ]; then # Get the value from a folder where each key is its own file. The key # file's entire contents is the value. value=$(cat "${file}/${key}") elif [ -f "${file}" ]; then # Get the value from a file that has multiple key=value combinations. # Return the first entry. There shouldn't be more than one anyways. # Substr at length($1) + 2 skips past the key and following = sign (awk # uses 1-based indexes), but preserves embedded = characters. value=$(sed -n "/^${key}[[:space:]]*=/{s:^[^=]*=::p;q}" "${file}") fi echo "${value:-undefined}" } get_keys() { local file="$1" regex="$2" cut -d '=' -f1 "${file}" | grep --color=never "${regex}" } # Return the channel name (sans "-channel" suffix). get_channel() { getprop ro.product.channel | sed 's:-channel$::' } # Return the hardware class or "undefined". get_hardware_class() { if [ -r "${HWCLASS_PATH}" ]; then cat "${HWCLASS_PATH}" else echo "undefined" fi } # Return the log string filtered with only JSON-safe white-listed characters. filter_log_string() { echo "$1" | tr -cd '[:alnum:]_.\-:;' } send_crash() { local meta_path="$1" local report_payload="$(get_key_value "${meta_path}" "payload")" local kind="$(get_kind "${meta_path}")" local exec_name="$(get_key_value "${meta_path}" "exec_name")" local url="$(get_key_value "${OSRELEASED_FOLDER}" "crash_server")" local bdk_version="$(get_key_value "${meta_path}" "bdk_version")" local hwclass="$(get_hardware_class)" local write_payload_size="$(get_key_value "${meta_path}" "payload_size")" local log="$(get_key_value "${meta_path}" "log")" local sig="$(get_key_value "${meta_path}" "sig")" local send_payload_size="$(stat -c "%s" "${report_payload}" 2>/dev/null)" local product="$(get_key_value "${meta_path}" "product_id")" local version="$(get_key_value "${meta_path}" "product_version")" local upload_prefix="$(get_key_value "${meta_path}" "upload_prefix")" local guid local model_manifest_id="$(get_key_value "${WEAVE_CONF_FILE}" "model_id")" # If crash_reporter.server is not set return with an error. if [ -z "${url}" ]; then lecho "Configuration error: crash_reporter.server not set." return 1 fi set -- \ -F "write_payload_size=${write_payload_size}" \ -F "send_payload_size=${send_payload_size}" if [ "${sig}" != "undefined" ]; then set -- "$@" \ -F "sig=${sig}" \ -F "sig2=${sig}" fi if [ -r "${report_payload}" ]; then set -- "$@" \ -F "upload_file_${kind}=@${report_payload}" fi if [ "${log}" != "undefined" -a -r "${log}" ]; then set -- "$@" \ -F "log=@${log}" fi if [ "${upload_prefix}" = "undefined" ]; then upload_prefix="" fi # Grab any variable that begins with upload_. local v for k in $(get_keys "${meta_path}" "^upload_"); do v="$(get_key_value "${meta_path}" "${k}")" case ${k} in # Product & version are handled separately. upload_var_prod) ;; upload_var_ver) ;; upload_var_*) set -- "$@" -F "${upload_prefix}${k#upload_var_}=${v}" ;; upload_file_*) if [ -r "${v}" ]; then set -- "$@" -F "${upload_prefix}${k#upload_file_}=@${v}" fi ;; esac done # If ID or VERSION_ID is undefined, we use the default product name # and bdk_version from /etc/os-release.d. if [ "${product}" = "undefined" ]; then product="${BRILLO_PRODUCT}" fi if [ "${version}" = "undefined" ]; then version="${bdk_version}" fi local image_type if is_test_image; then image_type="test" elif is_developer_image; then image_type="dev" elif [ ${FORCE_OFFICIAL} -ne 0 ]; then image_type="force-official" elif is_mock && ! is_mock_successful; then image_type="mock-fail" fi local boot_mode if is_developer_mode; then boot_mode="dev" fi # Need to strip dashes ourselves as Chrome preserves it in the file # nowadays. This is also what the Chrome breakpad client does. guid=$(tr -d '-' < "${GUID_FILE}") local error_type="$(get_key_value "${meta_path}" "error_type")" [ "${error_type}" = "undefined" ] && error_type= lecho "Sending crash:" if [ "${product}" != "${BRILLO_PRODUCT}" ]; then lecho " Sending crash report on behalf of ${product}" fi lecho " Metadata: ${meta_path} (${kind})" lecho " Payload: ${report_payload}" lecho " Version: ${version}" lecho " Bdk Version: ${bdk_version}" [ -n "${image_type}" ] && lecho " Image type: ${image_type}" [ -n "${boot_mode}" ] && lecho " Boot mode: ${boot_mode}" if is_mock; then lecho " Product: ${product}" lecho " URL: ${url}" lecho " HWClass: ${hwclass}" lecho " write_payload_size: ${write_payload_size}" lecho " send_payload_size: ${send_payload_size}" if [ "${log}" != "undefined" ]; then lecho " log: @${log}" fi if [ "${sig}" != "undefined" ]; then lecho " sig: ${sig}" fi fi lecho " Exec name: ${exec_name}" [ -n "${error_type}" ] && lecho " Error type: ${error_type}" if is_mock; then if ! is_mock_successful; then lecho "Mocking unsuccessful send" return 1 fi lecho "Mocking successful send" return 0 fi # Read in the first proxy, if any, for a given URL. NOTE: The # double-quotes are necessary due to a bug in dash with the "local" # builtin command and values that have spaces in them (see # "https://bugs.launchpad.net/ubuntu/+source/dash/+bug/139097"). if [ -f "${LIST_PROXIES}" ]; then local proxy ret proxy=$("${LIST_PROXIES}" --quiet "${url}") ret=$? if [ ${ret} -ne 0 ]; then proxy='' lwarn "Listing proxies failed with exit code ${ret}" else proxy=$(echo "${proxy}" | head -1) fi fi # if a direct connection should be used, unset the proxy variable. [ "${proxy}" = "direct://" ] && proxy= local report_id="${TMP_DIR}/report_id" local curl_stderr="${TMP_DIR}/curl_stderr" set +e curl "${url}" -f -v ${proxy:+--proxy "$proxy"} \ --capath "$(get_certificates_path)" --ciphers HIGH \ -F "prod=${product}" \ -F "ver=${version}" \ -F "bdk_version=${bdk_version}" \ -F "hwclass=${hwclass}" \ -F "exec_name=${exec_name}" \ -F "model_manifest_id=${model_manifest_id}" \ ${image_type:+-F "image_type=${image_type}"} \ ${boot_mode:+-F "boot_mode=${boot_mode}"} \ ${error_type:+-F "error_type=${error_type}"} \ -F "guid=${guid}" \ -o "${report_id}" \ "$@" \ 2>"${curl_stderr}" curl_result=$? set -e if [ ${curl_result} -eq 0 ]; then local id="$(cat "${report_id}")" local timestamp="$(date +%s)" local filter_prod="$(filter_log_string "${product}")" local filter_exec="$(filter_log_string "${exec_name}")" if [ "${filter_prod}" != "${product}" ]; then lwarn "Product name filtered to: ${filter_prod}." fi if [ "${filter_exec}" != "${exec_name}" ]; then lwarn "Exec name filtered to: ${filter_exec}." fi printf "{'time':%s,'id':'%s','product':'%s','exec_name':'%s'}\n" \ "${timestamp}" "${id}" "${filter_prod}" "${filter_exec}" >> "${CRASH_LOG}" lecho "Crash report receipt ID ${id}" else lecho "Crash sending failed with exit code ${curl_result}: " \ "$(cat "${curl_stderr}")" fi rm -f "${report_id}" return ${curl_result} } # *.meta files always end with done=1 so we can tell if they are complete. is_complete_metadata() { grep -q "done=1" "$1" } # Remove the given report path. remove_report() { local base="${1%.*}" rm -f -- "${base}".* } # Send all crashes from the given directory. This applies even when we're on a # 3G connection (see crosbug.com/3304 for discussion). send_crashes() { local dir="$1" lecho "Sending crashes for ${dir}" if [ ! -d "${dir}" ]; then return fi # Consider any old files which still have no corresponding meta file # as orphaned, and remove them. for old_file in $(find "${dir}" -mindepth 1 \ -mtime +1 -type f); do if [ ! -e "$(get_base "${old_file}").meta" ]; then lecho "Removing old orphaned file: ${old_file}." rm -f -- "${old_file}" fi done # Look through all metadata (*.meta) files, oldest first. That way, the rate # limit does not stall old crashes if there's a high amount of new crashes # coming in. # For each crash report, first evaluate conditions that might lead to its # removal to honor user choice and to free disk space as soon as possible, # then decide whether it should be sent right now or kept for later sending. for meta_path in $(ls -1tr "${dir}"/*.meta 2>/dev/null); do lecho "Considering metadata ${meta_path}." local kind=$(get_kind "${meta_path}") if [ "${kind}" != "minidump" ] && \ [ "${kind}" != "kcrash" ] && \ [ "${kind}" != "log" ] && [ "${kind}" != "devcore" ]; then lecho "Unknown report kind ${kind}. Removing report." remove_report "${meta_path}" continue fi if ! is_complete_metadata "${meta_path}"; then # This report is incomplete, so if it's old, just remove it. local old_meta=$(find "${dir}" -mindepth 1 -name \ $(basename "${meta_path}") -mtime +1 -type f) if [ -n "${old_meta}" ]; then lecho "Removing old incomplete metadata." remove_report "${meta_path}" else lecho "Ignoring recent incomplete metadata." fi continue fi # Ignore device coredump if device coredump uploading is not allowed. if [ "${kind}" = "devcore" ] && ! is_device_coredump_upload_allowed; then lecho "Ignoring device coredump. Device coredump upload not allowed." continue fi if ! is_mock && ! is_official_image; then lecho "Not an official OS version. Removing crash." remove_report "${meta_path}" continue fi # Remove existing crashes in case user consent has not (yet) been given or # has been revoked. This must come after the guest mode check because # metrics_client always returns "not consented" in guest mode. if ! metrics_client -c; then lecho "Crash reporting is disabled. Removing crash." remove_report "${meta_path}" continue fi # Skip report if the upload rate is exceeded. (Don't exit right now because # subsequent reports may be candidates for deletion.) if ! check_rate; then lecho "Sending ${meta_path} would exceed rate. Leaving for later." continue fi # The .meta file should be written *after* all to-be-uploaded files that it # references. Nevertheless, as a safeguard, a hold-off time of thirty # seconds after writing the .meta file is ensured. Also, sending of crash # reports is spread out randomly by up to SECONDS_SEND_SPREAD. Thus, for # the sleep call the greater of the two delays is used. local now=$(date +%s) local holdoff_time=$(($(stat -c "%Y" "${meta_path}") + 30 - ${now})) local spread_time=$(generate_uniform_random "${SECONDS_SEND_SPREAD}") local sleep_time if [ ${spread_time} -gt ${holdoff_time} ]; then sleep_time="${spread_time}" else sleep_time="${holdoff_time}" fi lecho "Scheduled to send in ${sleep_time}s." if ! is_mock; then if ! sleep "${sleep_time}"; then lecho "Sleep failed" return 1 fi fi # Try to upload. if ! send_crash "${meta_path}"; then lecho "Problem sending ${meta_path}, not removing." continue fi # Send was successful, now remove. lecho "Successfully sent crash ${meta_path} and removing." remove_report "${meta_path}" done } usage() { cat <<EOF Usage: crash_sender [options] Options: -e <var>=<val> Set env |var| to |val| (only some vars) EOF exit ${1:-1} } parseargs() { # Parse the command line arguments. while [ $# -gt 0 ]; do case $1 in -e) shift case $1 in FORCE_OFFICIAL=*|\ MAX_CRASH_RATE=*|\ MOCK_DEVELOPER_MODE=*|\ OVERRIDE_PAUSE_SENDING=*|\ SECONDS_SEND_SPREAD=*) export "$1" ;; *) lecho "Unknown var passed to -e: $1" exit 1 ;; esac ;; -h) usage 0 ;; *) lecho "Unknown options: $*" exit 1 ;; esac shift done } main() { parseargs "$@" if [ -e "${PAUSE_CRASH_SENDING}" ] && \ [ ${OVERRIDE_PAUSE_SENDING} -eq 0 ]; then lecho "Exiting early due to ${PAUSE_CRASH_SENDING}." exit 1 fi if is_test_image; then lecho "Exiting early due to test image." exit 1 fi # We don't perform checks on this because we have a master lock with the # CRASH_SENDER_LOCK file. This pid file is for the system to keep track # (like with autotests) that we're still running. echo $$ > "${RUN_FILE}" for dependency in "$(get_certificates_path)"; do if [ ! -x "${dependency}" ]; then lecho "Fatal: Crash sending disabled: ${dependency} not found." exit 1 fi done TMP_DIR="$(mktemp -d "${CRASH_STATE_DIR}/tmp/crash_sender.XXXXXX")" # Send system-wide crashes send_crashes "${CRASH_STATE_DIR}/crash" } trap cleanup EXIT INT TERM #TODO(http://b/23937249): Change the locking logic back to using flock. if ! mkdir "${CRASH_SENDER_LOCK}" 2>/dev/null; then lecho "Already running; quitting." crash_done exit 1 fi main "$@"