#! /usr/bin/python

# Copyright 2017 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""
Swarming bot manager running on servers that hold swarming bots.
This manages running swarming bots and routinely recovers any that die.
"""

import argparse
import logging
import signal
import socket
import sys
import time
import urllib2

import common
from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
from autotest_lib.site_utils.chromeos_proxy import swarming_bots

from chromite.lib import metrics
from chromite.lib import ts_mon_config


# The seconds between consequent bot check.
CHECK_INTERVAL = 180

_shut_down = False

metrics_template = 'chromeos/autotest/swarming/bot_manager/%s'

def _parse_args(args):
    """Parse system arguments."""
    parser = argparse.ArgumentParser(
            description='Manage the set of swarming bots running on a server')
    parser.add_argument('afe', type=str,
                        help='AFE to get server role and status.')
    # TODO(xixuan): refactor together with swarming_bots.
    parser.add_argument(
            'id_range', type=str,
            help='A range of integer, each bot created will be labeled '
                 'with an id from this range. E.g. "1-200"')
    parser.add_argument(
            'working_dir', type=str,
            help='A working directory where bots will store files '
                 'generated at runtime')
    parser.add_argument(
            '-p', '--swarming_proxy', type=str, dest='swarming_proxy',
            default=swarming_bots.DEFAULT_SWARMING_PROXY,
            help='The URL of the swarming instance to talk to, '
                 'Default to the one specified in global config')
    parser.add_argument(
            '-f', '--log_file', dest='log_file',
            help='Path to the log file.')
    parser.add_argument(
            '-v', '--verbose', dest='verbose', action='store_true',
            help='Verbose mode')

    return parser.parse_args(args)


def handle_signal(signum, frame):
    """Function called when being killed.

    @param signum: The signal received.
    @param frame: Ignored.
    """
    del signum
    del frame

    _shut_down = True


def is_server_in_prod(server_name, afe):
    """Validate server's role and status.

    @param server_name: the server name to be validated.
    @param afe: the afe server to get role & status info in server_db.

    @return: A boolean value, True when the server_name is in prod, False
             otherwise, or if RPC fails.
    """
    logging.info('Validating server: %s', server_name)
    afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10,
                                        server=afe)
    is_prod_proxy_server = False
    try:
        if afe.run('get_servers', hostname=server_name,
                   status='primary', role='golo_proxy'):
            is_prod_proxy_server = True

    except urllib2.URLError as e:
        logging.warning('RPC get_servers failed on afe %s: %s', afe, str(e))
    finally:
        metrics.Counter(metrics_template % 'server_in_prod_check').increment(
                fields={'success': is_prod_proxy_server})
        return is_prod_proxy_server


@metrics.SecondsTimerDecorator(metrics_template % 'tick')
def tick(afe, bot_manager):
    """One tick for swarming bot manager.

    @param afe: the afe to check server role.
    @param bot_manager: a swarming_bots.BotManager instance.
    """
    if is_server_in_prod(socket.getfqdn(), afe):
        bot_manager.check()


def main(args):
    """Main func.

    @args: A list of system arguments.
    """
    args = _parse_args(args)
    swarming_bots.setup_logging(args.verbose, args.log_file)

    if not args.swarming_proxy:
        logging.error(
                'No swarming proxy instance specified. '
                'Specify swarming_proxy in [CROS] in shadow_config, '
                'or use --swarming_proxy')
        return 1

    if not args.swarming_proxy.startswith('https://'):
        swarming_proxy = 'https://' + args.swarming_proxy
    else:
        swarming_proxy = args.swarming_proxy

    global _shut_down
    logging.info("Setting signal handler.")
    signal.signal(signal.SIGINT, handle_signal)
    signal.signal(signal.SIGTERM, handle_signal)

    bot_manager = swarming_bots.BotManager(
            swarming_bots.parse_range(args.id_range),
            args.working_dir,
            args.swarming_proxy)
    is_prod = False
    retryable = True
    with ts_mon_config.SetupTsMonGlobalState('swarming_bots', indirect=True):
        while not _shut_down:
            tick(args.afe, bot_manager)
            time.sleep(CHECK_INTERVAL)


if __name__ == '__main__':
    sys.exit(main(sys.argv[1:]))