普通文本  |  117行  |  3.98 KB

#!/usr/bin/env python
# Copyright 2018 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""A simple service to monitor DUT statuses from master db/afe."""
import collections
import logging
import sys
import time

import common
from autotest_lib.server import constants
from autotest_lib.server import frontend
from chromite.lib import metrics
from chromite.lib import ts_mon_config

from infra_libs import ts_mon


DutCountBucket = collections.namedtuple('DutCountBucket',
                                        ['board',
                                         'model',
                                         'pool',
                                         'is_locked',
                                         'status']
                                        )


def _get_bucket_for_host(host):
    """Determine the counter bucket for |host|.

    Args:
        host: A Host object as returned by afe.

    Returns:
        A DutCountBucket instance describing the bucket for this host.
    """
    board = _get_unique_label(host.labels, constants.Labels.BOARD_PREFIX)
    model = _get_unique_label(host.labels, constants.Labels.MODEL_PREFIX)
    pool = _get_unique_label(host.labels, constants.Labels.POOL_PREFIX)
    if pool in constants.Pools.MANAGED_POOLS:
        pool = 'managed:' + pool
    status = host.status or '[None]'
    is_locked = host.locked
    return DutCountBucket(board, model, pool, is_locked, status)


def _get_unique_label(labels, prefix):
    """Return the labels for a given prefix, with prefix stripped.

    If prefixed label does not occur, return '[None]'
    If prefixed label occurs multiply, return '[Multiple]'

    _get_unique_label(['foo:1', 'foo:2', 'bar1'], 'foo:') -> '[Multiple]'

    _get_unique_label(['foo:1', 'bar2', 'baz3'], 'foo:') -> '1'

    _get_prefixed_labels(['bar1', 'baz1'], 'foo:') -> '[None]'
    """
    ls = [l[len(prefix):] for l in labels if l.startswith(prefix)]
    if not ls:
        return '[None]'
    elif len(ls) == 1:
        return ls[0]
    else:
        return '[Multiple]'


def main(argv):
    """Entry point for dut_mon."""
    logging.getLogger().setLevel(logging.INFO)

    with ts_mon_config.SetupTsMonGlobalState('dut_mon', indirect=True):
        afe = frontend.AFE()
        counters = collections.defaultdict(lambda: 0)

        field_spec = [ts_mon.StringField('board'),
                      ts_mon.StringField('model'),
                      ts_mon.StringField('pool'),
                      ts_mon.BooleanField('is_locked'),
                      ts_mon.StringField('status'),
                      ]
        dut_count = metrics.Gauge('chromeos/autotest/dut_mon/dut_count',
                                  description='The number of duts in a given '
                                              'state and bucket.',
                                  field_spec=field_spec)
        tick_count = metrics.Counter('chromeos/autotest/dut_mon/tick',
                                     description='Tick counter of dut_mon.')

        while True:
            # Note: We reset all counters to zero in each loop rather than
            # creating a new defaultdict, because we want to ensure that any
            # gauges that were previously set to a nonzero value by this process
            # get set back to zero if necessary.
            for k in counters:
                counters[k] = 0

            logging.info('Fetching all hosts.')
            hosts = afe.get_hosts()
            logging.info('Fetched %s hosts.', len(hosts))
            for host in hosts:
                fields = _get_bucket_for_host(host)
                counters[fields] += 1

            for field, value in counters.iteritems():
                logging.info('%s %s', field, value)
                dut_count.set(value, fields=field.__dict__)

            tick_count.increment()
            logging.info('Sleeping for 2 minutes.')
            time.sleep(120)


if __name__ == '__main__':
    main(sys.argv)