# Copyright (c) 2014 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import os
from autotest_lib.client.bin import test
from autotest_lib.client.bin import utils
from autotest_lib.client.common_lib import error
import numpy
import perf_lbr_verification
import perf_verification
import stats_utils
INTEL_LBR_UARCHS = (
# 'Broadwell', # Waiting on kernel support.
'Haswell',
'IvyBridge',
'SandyBridge')
class hardware_PerfCounterVerification(test.test):
"""Verify perf counters count what we think they count.
For cycles and instructions, we expect a strong correlation between
the number of iterations of a "noploop" program and the number of
cycles and instructions. For TLB misses, we expect a strong correlation
between number of misses and number of iterations of a matching benchmark
Each loop iteration should retire a constant number of additional
instructions, and should take a nearly constant number of additional
cycles or misses.
"""
version = 1
preserve_srcdir = True
def initialize(self, perf_cmd='stat', events=('cycles', 'instructions')):
self.job.require_gcc()
self.perf_cmd = perf_cmd
self.events = events
def setup(self):
os.chdir(self.srcdir)
utils.make('clean')
utils.make()
def warmup(self):
if self.perf_cmd == 'record -b':
uarch = utils.get_intel_cpu_uarch()
if uarch not in INTEL_LBR_UARCHS:
raise error.TestNAError('Unsupported microarchitecture.')
unsupported_boards = ['gizmo']
board = utils.get_board()
if board in unsupported_boards:
raise error.TestNAError('Unsupported board')
def run_once(self, program, multiplier, **kwargs):
program = os.path.join(self.srcdir, program)
if self.perf_cmd == 'stat':
self.facts = perf_verification.GatherPerfStats(
program, ','.join(self.events), multiplier)
elif self.perf_cmd == 'record -b':
branch = perf_lbr_verification.ReadBranchAddressesFile(
os.path.join(self.srcdir, 'noploop_branch.txt'))
self.facts = perf_lbr_verification.GatherPerfBranchSamples(
program, branch, ','.join(self.events),
10000)
else:
raise error.TestError('Unrecognized perf_cmd')
def postprocess_iteration(self):
if self.perf_cmd == 'stat':
dt = numpy.dtype([('loops', numpy.int)] +
[(e, numpy.int) for e in self.events])
elif self.perf_cmd == 'record -b':
dt = numpy.dtype([('loops', numpy.int),
('branch_count', numpy.int)])
arr = stats_utils.FactsToNumpyArray(self.facts, dt)
results = {}
is_tlb_benchmark = ('iTLB-misses' in dt.names or
'dTLB-misses' in dt.names)
for y_var in dt.names:
if y_var == 'loops': continue
if y_var == 'cycles' and is_tlb_benchmark: continue
(slope, intercept), r2 = stats_utils.LinearRegression(
arr['loops'], arr[y_var])
prefix = y_var + '_'
results[prefix+'slope'] = slope
results[prefix+'intercept'] = intercept
results[prefix+'r_squared'] = r2
if y_var in ('dTLB-misses', 'iTLB-misses'):
misses_per_milion_cycles = [x[y_var] * 1.0e6 / x['cycles']
for x in self.facts]
rvar = prefix+'misses_per_milion_cycles'
results[rvar] = numpy.max(misses_per_milion_cycles)
# Output the standard Autotest way:
self.write_perf_keyval(results)
# ... And the CrOS-specific way:
for k, v in results.iteritems():
self.output_perf_value(k, v)
if ('cycles' in self.events and not is_tlb_benchmark and
results['cycles_r_squared'] < 0.996):
raise error.TestFail('Poor correlation for cycles ~ loops')
if ('instructions' in self.events and
results['instructions_r_squared'] < 0.999):
raise error.TestFail('Poor correlation for instructions ~ loops')
if ('iTLB-misses' in self.events and
results['iTLB-misses_r_squared'] < 0.999):
raise error.TestFail('Poor correlation for iTLB-misses ~ loops')
if ('dTLB-misses' in self.events and
results['dTLB-misses_r_squared'] < 0.999):
raise error.TestFail('Poor correlation for dTLB-misses ~ loops')
if (self.perf_cmd == 'record -b' and
results['branch_count_r_squared'] < 0.9999999):
raise error.TestFail('Poor correlation for branch_count ~ loops')