#!/usr/bin/env python

# Copyright 2016 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""
This module is used to upload csv files generated by performance related tests
to cns. More details about the implementation can be found in crbug.com/598504.

The overall work flow is as follows.
1. Query tko_test_attributes table for perf_csv_folder attribute. The attribute
contains a path to csv files need to be uploaded to cns.
2. Filter the perf_csv_folder attributes only for test jobs have finished an
hour before. This is to make sure the results have already being uploaded to GS.
3. Locate the csv files in GS, and upload them to desired cns location.

After every run, the script saves the maximum test idx to a local file, and
repeats the workflow.

"""

import argparse
import datetime
import logging
import os
import shutil
import tempfile
import time

import common
from autotest_lib.client.bin import utils
from autotest_lib.client.common_lib import logging_config
from autotest_lib.client.common_lib.cros import retry
from autotest_lib.frontend import setup_django_environment
from autotest_lib.frontend.tko import models as tko_models


# Number of hours that a test has to be finished for the script to process.
# This allows gs_offloader to have enough time to upload the results to GS.
CUTOFF_TIME_HOURS = 1

# Default wait time in seconds after each run.
DEFAULT_INTERVAL_SEC = 60

# Timeout in minutes for upload attempts for a given folder.
UPLOAD_TIMEOUT_MINS = 5

class CsvNonexistenceException(Exception):
    """Exception raised when csv files not found in GS."""


class CsvFolder(object):
    """A class contains the information of a folder storing csv files to be
    uploaded, and logic to upload the csv files.
    """

    # A class variable whose value is the GoogleStorage path to the test
    # results.
    gs_path = None

    # A class variable whose value is the cns path to upload the csv files to.
    cns_path = None

    def __init__(self, test_attribute_id, perf_csv_folder, test_view):
        """Initialize a CsvFolder object.

        @param test_attribute_id: ID of test attribute record.
        @param perf_csv_folder: Path of the folder contains csv files in test
                results. It's the value of perf_csv_folder attribute from
                tko_test_attributes table.
        @param test_view: A db object from querying tko_test_view_2 for the
                related tko_test_attributes.
        """
        self.test_attribute_id = test_attribute_id
        self.perf_csv_folder = perf_csv_folder
        self.test_view = test_view


    def __str__(self):
        return '%s:%s:%s' % (self.test_view.job_name, self.test_view.job_tag,
                             self.perf_csv_folder)


    def _get_url(self):
        """Get the url to the folder storing csv files in GS.

        The url can be formulated based on csv folder, test_name and hostname.
        For example:
        gs://chromeos-autotest-results/123-chromeos-test/host1/
        gsutil is used to download the csv files with this gs url.
        """
        return os.path.join(self.gs_path, self.test_view.job_tag)


    def _download(self, dest_dir):
        """Download the folder containing csv files to the given dest_dir.

        @param dest_dir: A directory to store the downloaded csv files.

        @return: A list of strings, each is a path to a csv file in the
                 downloaded folder.
        @raise CsvNonexistenceException: If no csv file found in the GS.
        """
        gs_url = self._get_url()
        # Find all csv files in given GS url recursively
        files = utils.run('gsutil ls -r %s | grep -e .*\\\\.csv$' %
                          gs_url, ignore_status=True).stdout.strip().split('\n')
        if not files or files == ['']:
            raise CsvNonexistenceException('No csv file found in %s', gs_url)

        # Copy files from GS to temp_dir
        for f in files:
            utils.run('gsutil cp %s %s' % (f, dest_dir))


    @retry.retry(Exception, blacklist=[CsvNonexistenceException],
                 timeout_min=UPLOAD_TIMEOUT_MINS)
    def upload(self):
        """Upload the folder to cns.
        """
        temp_dir = tempfile.mkdtemp(suffix='perf_csv')
        try:
            self._download(temp_dir)
            files = os.listdir(temp_dir)
            # File in cns is stored under folder with format of:
            # <test_name>/<host_name>/YYYY/mm/dd/hh/mm
            path_in_cns = os.path.join(
                    self.cns_path,
                    self.test_view.test_name, self.test_view.hostname,
                    str(self.test_view.job_finished_time.year),
                    str(self.test_view.job_finished_time.month).zfill(2),
                    str(self.test_view.job_finished_time.day).zfill(2),
                    str(self.test_view.job_finished_time.hour).zfill(2),
                    str(self.test_view.job_finished_time.minute).zfill(2))
            utils.run('fileutil mkdir -p %s' % path_in_cns)
            for f in files:
                utils.run('fileutil copytodir -f %s %s' %
                          (os.path.join(temp_dir, f), path_in_cns))
        finally:
            shutil.rmtree(temp_dir)


class DBScanner(object):
    """Class contains the logic to query tko_test_attributes table for
    new perf_csv_folder attributes and create CsvFolder object for each
    new perf_csv_folder attribute.
    """

    # Minimum test_attribute id for querying tko_test_attributes table.
    min_test_attribute_id = -1

    @classmethod
    def get_perf_csv_folders(cls):
        """Query tko_test_attributes table for new entries of perf_csv_folder.

        @return: A list of CsvFolder objects for each new entry of
                 perf_csv_folder attribute in tko_test_attributes table.
        """
        attributes = tko_models.TestAttribute.objects.filter(
                attribute='perf_csv_folder', id__gte=cls.min_test_attribute_id)
        folders = []

        cutoff_time = (datetime.datetime.now() -
                       datetime.timedelta(hours=CUTOFF_TIME_HOURS))
        for attribute in attributes:
            test_views = tko_models.TestView.objects.filter(
                    test_idx=attribute.test_id)
            if test_views[0].job_finished_time > cutoff_time:
                continue
            folders.append(CsvFolder(attribute.id, attribute.value,
                                     test_views[0]))
        return folders


def setup_logging(log_dir):
    """Setup logging information.

    @param log_dir: Path to the directory storing logs of this script.
    """
    config = logging_config.LoggingConfig()
    logfile = os.path.join(os.path.abspath(log_dir), 'perf_csv_uploader.log')
    config.add_file_handler(file_path=logfile, level=logging.DEBUG)


def save_min_test_attribute_id(test_attribute_id_file):
    """Save the minimum test attribute id to a cached file.

    @param test_attribute_id_file: Path to the file storing the value of
            min_test_attribute_id.
    """
    with open(test_attribute_id_file, 'w') as f:
        return f.write(str(DBScanner.min_test_attribute_id))


def get_min_test_attribute_id(test_attribute_id_file):
    """Get the minimum test attribute id from a cached file.

    @param test_attribute_id_file: Path to the file storing the value of
            min_test_attribute_id.
    """
    try:
        with open(test_attribute_id_file, 'r') as f:
            return int(f.read())
    except IOError:
        # min_test_attribute_id has not been set, default to -1.
        return -1


def get_options():
    """Get the command line options.

    @return: Command line options of the script.
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('--gs_path', type=str, dest='gs_path',
                        help='GoogleStorage path that stores test results.')
    parser.add_argument('--cns_path', type=str, dest='cns_path',
                        help='cns path to where csv files are uploaded to.')
    parser.add_argument('--log_dir', type=str, dest='log_dir',
                        help='Directory used to store logs.')

    options = parser.parse_args()
    CsvFolder.gs_path = options.gs_path
    CsvFolder.cns_path = options.cns_path

    return options


def main():
    """Main process to repeat the workflow of searching/uploading csv files.
    """
    options = get_options()
    setup_logging(options.log_dir)
    test_attribute_id_file = os.path.join(options.log_dir,
                                          'perf_csv_uploader_test_attr_id')
    DBScanner.min_test_attribute_id = get_min_test_attribute_id(
            test_attribute_id_file)

    while True:
        folders = DBScanner.get_perf_csv_folders()
        if not folders:
            logging.info('No new folders found. Wait...')
            time.sleep(DEFAULT_INTERVAL_SEC)
            continue

        failed_folders = []
        for folder in folders:
            try:
                logging.info('Uploading folder: %s', folder)
                folder.upload()
            except CsvNonexistenceException:
                # Ignore the failure if CSV files are not found in GS.
                pass
            except Exception as e:
                failed_folders.append(folder)
                logging.error('Failed to upload folder %s, error: %s',
                              folder, e)
        if failed_folders:
            # Set the min_test_attribute_id to be the smallest one that failed
            # to upload.
            min_test_attribute_id = min([folder.test_attribute_id for folder in
                                         failed_folders])
        else:
            min_test_attribute_id = max([folder.test_attribute_id for folder in
                                         folders]) + 1
        if DBScanner.min_test_attribute_id != min_test_attribute_id:
            DBScanner.min_test_attribute_id = min_test_attribute_id
            save_min_test_attribute_id(test_attribute_id_file)


if __name__ == '__main__':
    main()