普通文本  |  165行  |  5.93 KB

#!/usr/bin/python -u

"""
A script to help find the last few jobs that ran on a set of hosts that match
the specified query, and rank them according to frequence across these hosts.
Usage:
1. Get last 5 jobs from 1 day ago running on all lumpies in pool suites that are
    currently in repair fail:
    ./sheriff_host_utils --days_back=1
    --query 'labels=pool:suites,board:lumpy status="Repair Failed"'

2. Email someone about the last 5 jobs on all Repair Failed hosts.
    ./sheriff_host_utils --limit 5 --query 'status="Repair Failed"'
            --email someone@something.com
"""

import argparse
import collections
import datetime
import operator
import shlex
import sys

import common

from autotest_lib.client.common_lib import mail
from autotest_lib.frontend import setup_django_environment
from autotest_lib.frontend.afe import models
from autotest_lib.server import frontend
from autotest_lib.server.cros import repair_utils
from django.utils import timezone as django_timezone


def _parse_args(args):
    description=('./sheriff_host_utils.py --limit 5 --days_back 5 '
                 '--query \'status="Repair Failed" invalid=0 locked=0\'')
    if not args:
        print ('Too few arguments, execute %s, or try '
               './sheriff_host_utils.py --help' % description)
        sys.exit(1)

    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('--limit', default=5,
                        help='The number of jobs per host.Eg: --limit 5')
    parser.add_argument('--days_back', default=5,
                        help='Number of days to search. Eg: --days_back 5')
    default_query = 'status="Repair Failed" labels=pool:bvt,board:lumpy'
    parser.add_argument('--query', default=default_query,
                        help='Search query.Eg: --query %s' % default_query)
    parser.add_argument('--email', default=None, help='send results to email.')
    return parser.parse_args(args)


def _parse_query(query):
    """Parses query string for a host.

    All queries follow the format: 'key=value key2=value..' where all keys are
    are columns of the host table with the exception of labels. When specifying
    labels, the format is the same even though a label is a foreign key:
    --query 'lable=<comma seperated list of label names>'.

    @return: A dictionary into which the query has been parsed.
    """
    l = shlex.split(query)
    keys = [elem[:elem.find('=')] for elem in l]
    values = [elem[elem.find('=')+1:] for elem in l]
    payload = dict(zip(keys, values))
    return payload


def _get_pool(host):
    """Returns the pool of a host.
    """
    labels = host.labels.all()
    for label_name in [label.name for label in labels]:
        if 'pool' in label_name:
            return label_name


def retrieve_hosts(payload):
    """Retrieve hosts matching the payload.

    @param payload: A dict with selection criteria for hosts.

    @return: A queryset of hosts matching the payload.
    """
    # Replace label names with a foreign key query.
    query_hosts = models.Host.objects.all()
    if 'labels' in payload:
        for label in payload['labels'].split(','):
            query_hosts = query_hosts.filter(labels__name=label)
        del payload['labels']
    return query_hosts.filter(**payload)


def analyze_jobs(hqes):
    """Perform some aggregation on the jobs that ran on matching hosts.

    @return: A string with the results of the analysis.
    """
    names = [hqe.job.name for hqe in hqes]
    ranking = collections.Counter([name[name.rfind('/')+1:] for name in names])
    sorted_rankings = sorted(ranking.iteritems(), key=operator.itemgetter(1))
    m = 'Ranking tests that ran on those hosts by frequency: \n\t'
    for job_stat in reversed(sorted_rankings):
        m += '%s test name: %s\n\t' % (job_stat[1], job_stat[0])
    return m


def last_jobs_on_hosts(payload, limit_jobs, days_back):
    """Find the last limit_jobs on hosts with given status within days_back.

    @param payload: A dictionary specifiying the selection criteria of the hosts.
        Eg {'stauts': "Ready", 'id': 40}
    @param limit_jobs: The number of jobs per host.
    @param days_back: The days back to search for jobs.

    @retrurn: A string with information about the last jobs that ran on all
        hosts matching the query mentioned in the payload.
    """
    host_map = {}
    pool_less, job_less, jobs_to_analyze  = [], [], []
    hqes = models.HostQueueEntry.objects.all()
    cutoff = django_timezone.now().date() - datetime.timedelta(days=days_back)
    message = ''

    for host in retrieve_hosts(payload):
        pool = _get_pool(host)
        if not pool:
            pool_less.append(host.hostname)
            continue
        relevent_hqes = list(hqes.filter(host_id=host.id,
                started_on__gte=cutoff).order_by('-started_on')[:limit_jobs])
        if relevent_hqes:
            jobs = ['name: %s, id: %s' %
                    (hqe.job.name, hqe.job_id) for hqe in relevent_hqes]
            message += '%s\n%s\n\t%s' % (pool, host, '\n\t'.join(jobs))
            jobs_to_analyze += relevent_hqes
        else:
            job_less.append(host.hostname)

    if job_less:
        message += ('\nNo jobs found for the following hosts within cutoff %s\n\t' %
                    cutoff)
        message += '\n\t'.join(job_less)
    if pool_less:
        message += '%s%s' % ('\nNo pools found on the following hosts:',
                            '\n\t'.join(pool_less))
    if jobs_to_analyze:
        message += '\n\n%s' % analyze_jobs(jobs_to_analyze)

    if message:
        return '%s\n%s' % ('Host information:', message)
    return 'No hosts matching query %s from %s days back' % (payload, days_back)


if __name__ == '__main__':
    args = _parse_args(sys.argv[1:])
    message = last_jobs_on_hosts(_parse_query(args.query),
                                 int(args.limit), int(args.days_back))
    if args.email:
        mail.send('', args.email, '',
                  'Results from your sheirff script.', message)
    print message