/* * Copyright (C) 2015 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "JankTracker.h" #include "Properties.h" #include "utils/TimeUtils.h" #include <algorithm> #include <cutils/ashmem.h> #include <cutils/log.h> #include <cstdio> #include <errno.h> #include <inttypes.h> #include <limits> #include <cmath> #include <sys/mman.h> namespace android { namespace uirenderer { static const char* JANK_TYPE_NAMES[] = { "Missed Vsync", "High input latency", "Slow UI thread", "Slow bitmap uploads", "Slow issue draw commands", }; struct Comparison { FrameInfoIndex start; FrameInfoIndex end; }; static const Comparison COMPARISONS[] = { {FrameInfoIndex::IntendedVsync, FrameInfoIndex::Vsync}, {FrameInfoIndex::OldestInputEvent, FrameInfoIndex::Vsync}, {FrameInfoIndex::Vsync, FrameInfoIndex::SyncStart}, {FrameInfoIndex::SyncStart, FrameInfoIndex::IssueDrawCommandsStart}, {FrameInfoIndex::IssueDrawCommandsStart, FrameInfoIndex::FrameCompleted}, }; // If the event exceeds 10 seconds throw it away, this isn't a jank event // it's an ANR and will be handled as such static const int64_t IGNORE_EXCEEDING = seconds_to_nanoseconds(10); /* * We don't track direct-drawing via Surface:lockHardwareCanvas() * for now * * TODO: kSurfaceCanvas can negatively impact other drawing by using up * time on the RenderThread, figure out how to attribute that as a jank-causer */ static const int64_t EXEMPT_FRAMES_FLAGS = FrameInfoFlags::SurfaceCanvas; // The bucketing algorithm controls so to speak // If a frame is <= to this it goes in bucket 0 static const uint32_t kBucketMinThreshold = 5; // If a frame is > this, start counting in increments of 2ms static const uint32_t kBucket2msIntervals = 32; // If a frame is > this, start counting in increments of 4ms static const uint32_t kBucket4msIntervals = 48; // For testing purposes to try and eliminate test infra overhead we will // consider any unknown delay of frame start as part of the test infrastructure // and filter it out of the frame profile data static FrameInfoIndex sFrameStart = FrameInfoIndex::IntendedVsync; // The interval of the slow frame histogram static const uint32_t kSlowFrameBucketIntervalMs = 50; // The start point of the slow frame bucket in ms static const uint32_t kSlowFrameBucketStartMs = 150; // This will be called every frame, performance sensitive // Uses bit twiddling to avoid branching while achieving the packing desired static uint32_t frameCountIndexForFrameTime(nsecs_t frameTime) { uint32_t index = static_cast<uint32_t>(ns2ms(frameTime)); // If index > kBucketMinThreshold mask will be 0xFFFFFFFF as a result // of negating 1 (twos compliment, yaay) else mask will be 0 uint32_t mask = -(index > kBucketMinThreshold); // If index > threshold, this will essentially perform: // amountAboveThreshold = index - threshold; // index = threshold + (amountAboveThreshold / 2) // However if index is <= this will do nothing. It will underflow, do // a right shift by 0 (no-op), then overflow back to the original value index = ((index - kBucket4msIntervals) >> (index > kBucket4msIntervals)) + kBucket4msIntervals; index = ((index - kBucket2msIntervals) >> (index > kBucket2msIntervals)) + kBucket2msIntervals; // If index was < minThreshold at the start of all this it's going to // be a pretty garbage value right now. However, mask is 0 so we'll end // up with the desired result of 0. index = (index - kBucketMinThreshold) & mask; return index; } // Only called when dumping stats, less performance sensitive static uint32_t frameTimeForFrameCountIndex(uint32_t index) { index = index + kBucketMinThreshold; if (index > kBucket2msIntervals) { index += (index - kBucket2msIntervals); } if (index > kBucket4msIntervals) { // This works because it was already doubled by the above if // 1 is added to shift slightly more towards the middle of the bucket index += (index - kBucket4msIntervals) + 1; } return index; } JankTracker::JankTracker(const DisplayInfo& displayInfo) { // By default this will use malloc memory. It may be moved later to ashmem // if there is shared space for it and a request comes in to do that. mData = new ProfileData; reset(); nsecs_t frameIntervalNanos = static_cast<nsecs_t>(1_s / displayInfo.fps); #if USE_HWC2 nsecs_t sfOffset = frameIntervalNanos - (displayInfo.presentationDeadline - 1_ms); nsecs_t offsetDelta = sfOffset - displayInfo.appVsyncOffset; // There are two different offset cases. If the offsetDelta is positive // and small, then the intention is to give apps extra time by leveraging // pipelining between the UI & RT threads. If the offsetDelta is large or // negative, the intention is to subtract time from the total duration // in which case we can't afford to wait for dequeueBuffer blockage. if (offsetDelta <= 4_ms && offsetDelta >= 0) { // SF will begin composition at VSYNC-app + offsetDelta. If we are triple // buffered, this is the expected time at which dequeueBuffer will // return due to the staggering of VSYNC-app & VSYNC-sf. mDequeueTimeForgiveness = offsetDelta + 4_ms; } #endif setFrameInterval(frameIntervalNanos); } JankTracker::~JankTracker() { freeData(); } void JankTracker::freeData() { if (mIsMapped) { munmap(mData, sizeof(ProfileData)); } else { delete mData; } mIsMapped = false; mData = nullptr; } void JankTracker::switchStorageToAshmem(int ashmemfd) { int regionSize = ashmem_get_size_region(ashmemfd); if (regionSize < static_cast<int>(sizeof(ProfileData))) { ALOGW("Ashmem region is too small! Received %d, required %u", regionSize, static_cast<unsigned int>(sizeof(ProfileData))); return; } ProfileData* newData = reinterpret_cast<ProfileData*>( mmap(NULL, sizeof(ProfileData), PROT_READ | PROT_WRITE, MAP_SHARED, ashmemfd, 0)); if (newData == MAP_FAILED) { int err = errno; ALOGW("Failed to move profile data to ashmem fd %d, error = %d", ashmemfd, err); return; } // The new buffer may have historical data that we want to build on top of // But let's make sure we don't overflow Just In Case uint32_t divider = 0; if (newData->totalFrameCount > (1 << 24)) { divider = 4; } for (size_t i = 0; i < mData->jankTypeCounts.size(); i++) { newData->jankTypeCounts[i] >>= divider; newData->jankTypeCounts[i] += mData->jankTypeCounts[i]; } for (size_t i = 0; i < mData->frameCounts.size(); i++) { newData->frameCounts[i] >>= divider; newData->frameCounts[i] += mData->frameCounts[i]; } newData->jankFrameCount >>= divider; newData->jankFrameCount += mData->jankFrameCount; newData->totalFrameCount >>= divider; newData->totalFrameCount += mData->totalFrameCount; if (newData->statStartTime > mData->statStartTime || newData->statStartTime == 0) { newData->statStartTime = mData->statStartTime; } freeData(); mData = newData; mIsMapped = true; } void JankTracker::setFrameInterval(nsecs_t frameInterval) { mFrameInterval = frameInterval; mThresholds[kMissedVsync] = 1; /* * Due to interpolation and sample rate differences between the touch * panel and the display (example, 85hz touch panel driving a 60hz display) * we call high latency 1.5 * frameinterval * * NOTE: Be careful when tuning this! A theoretical 1,000hz touch panel * on a 60hz display will show kOldestInputEvent - kIntendedVsync of being 15ms * Thus this must always be larger than frameInterval, or it will fail */ mThresholds[kHighInputLatency] = static_cast<int64_t>(1.5 * frameInterval); // Note that these do not add up to 1. This is intentional. It's to deal // with variance in values, and should be sort of an upper-bound on what // is reasonable to expect. mThresholds[kSlowUI] = static_cast<int64_t>(.5 * frameInterval); mThresholds[kSlowSync] = static_cast<int64_t>(.2 * frameInterval); mThresholds[kSlowRT] = static_cast<int64_t>(.75 * frameInterval); } void JankTracker::addFrame(const FrameInfo& frame) { mData->totalFrameCount++; // Fast-path for jank-free frames int64_t totalDuration = frame.duration(sFrameStart, FrameInfoIndex::FrameCompleted); if (mDequeueTimeForgiveness && frame[FrameInfoIndex::DequeueBufferDuration] > 500_us) { nsecs_t expectedDequeueDuration = mDequeueTimeForgiveness + frame[FrameInfoIndex::Vsync] - frame[FrameInfoIndex::IssueDrawCommandsStart]; if (expectedDequeueDuration > 0) { // Forgive only up to the expected amount, but not more than // the actual time spent blocked. nsecs_t forgiveAmount = std::min(expectedDequeueDuration, frame[FrameInfoIndex::DequeueBufferDuration]); totalDuration -= forgiveAmount; } } uint32_t framebucket = frameCountIndexForFrameTime(totalDuration); // Keep the fast path as fast as possible. if (CC_LIKELY(totalDuration < mFrameInterval)) { mData->frameCounts[framebucket]++; return; } // Only things like Surface.lockHardwareCanvas() are exempt from tracking if (frame[FrameInfoIndex::Flags] & EXEMPT_FRAMES_FLAGS) { return; } if (framebucket <= mData->frameCounts.size()) { mData->frameCounts[framebucket]++; } else { framebucket = (ns2ms(totalDuration) - kSlowFrameBucketStartMs) / kSlowFrameBucketIntervalMs; framebucket = std::min(framebucket, static_cast<uint32_t>(mData->slowFrameCounts.size() - 1)); framebucket = std::max(framebucket, 0u); mData->slowFrameCounts[framebucket]++; } mData->jankFrameCount++; for (int i = 0; i < NUM_BUCKETS; i++) { int64_t delta = frame.duration(COMPARISONS[i].start, COMPARISONS[i].end); if (delta >= mThresholds[i] && delta < IGNORE_EXCEEDING) { mData->jankTypeCounts[i]++; } } } void JankTracker::dumpBuffer(const void* buffer, size_t bufsize, int fd) { if (bufsize < sizeof(ProfileData)) { return; } const ProfileData* data = reinterpret_cast<const ProfileData*>(buffer); dumpData(data, fd); } void JankTracker::dumpData(const ProfileData* data, int fd) { if (sFrameStart != FrameInfoIndex::IntendedVsync) { dprintf(fd, "\nNote: Data has been filtered!"); } dprintf(fd, "\nStats since: %" PRIu64 "ns", data->statStartTime); dprintf(fd, "\nTotal frames rendered: %u", data->totalFrameCount); dprintf(fd, "\nJanky frames: %u (%.2f%%)", data->jankFrameCount, (float) data->jankFrameCount / (float) data->totalFrameCount * 100.0f); dprintf(fd, "\n50th percentile: %ums", findPercentile(data, 50)); dprintf(fd, "\n90th percentile: %ums", findPercentile(data, 90)); dprintf(fd, "\n95th percentile: %ums", findPercentile(data, 95)); dprintf(fd, "\n99th percentile: %ums", findPercentile(data, 99)); for (int i = 0; i < NUM_BUCKETS; i++) { dprintf(fd, "\nNumber %s: %u", JANK_TYPE_NAMES[i], data->jankTypeCounts[i]); } dprintf(fd, "\nHISTOGRAM:"); for (size_t i = 0; i < data->frameCounts.size(); i++) { dprintf(fd, " %ums=%u", frameTimeForFrameCountIndex(i), data->frameCounts[i]); } for (size_t i = 0; i < data->slowFrameCounts.size(); i++) { dprintf(fd, " %zums=%u", (i * kSlowFrameBucketIntervalMs) + kSlowFrameBucketStartMs, data->slowFrameCounts[i]); } dprintf(fd, "\n"); } void JankTracker::reset() { mData->jankTypeCounts.fill(0); mData->frameCounts.fill(0); mData->slowFrameCounts.fill(0); mData->totalFrameCount = 0; mData->jankFrameCount = 0; mData->statStartTime = systemTime(CLOCK_MONOTONIC); sFrameStart = Properties::filterOutTestOverhead ? FrameInfoIndex::HandleInputStart : FrameInfoIndex::IntendedVsync; } uint32_t JankTracker::findPercentile(const ProfileData* data, int percentile) { int pos = percentile * data->totalFrameCount / 100; int remaining = data->totalFrameCount - pos; for (int i = data->slowFrameCounts.size() - 1; i >= 0; i--) { remaining -= data->slowFrameCounts[i]; if (remaining <= 0) { return (i * kSlowFrameBucketIntervalMs) + kSlowFrameBucketStartMs; } } for (int i = data->frameCounts.size() - 1; i >= 0; i--) { remaining -= data->frameCounts[i]; if (remaining <= 0) { return frameTimeForFrameCountIndex(i); } } return 0; } } /* namespace uirenderer */ } /* namespace android */