/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                        Intel License Agreement
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of Intel Corporation may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#include "_ml.h"

#if 0
/****************************************************************************************\
*                         Auxilary functions declarations                                *
\****************************************************************************************/
/*---------------------- functions for the CNN classifier ------------------------------*/
static float icvCNNModelPredict(
        const CvStatModel* cnn_model,
        const CvMat* image,
        CvMat* probs CV_DEFAULT(0) );

static void icvCNNModelUpdate(
        CvStatModel* cnn_model, const CvMat* images, int tflag,
        const CvMat* responses, const CvStatModelParams* params,
        const CvMat* CV_DEFAULT(0), const CvMat* sample_idx CV_DEFAULT(0),
        const CvMat* CV_DEFAULT(0), const CvMat* CV_DEFAULT(0));

static void icvCNNModelRelease( CvStatModel** cnn_model );

static void icvTrainCNNetwork( CvCNNetwork* network,
                               const float** images,
                               const CvMat* responses,
                               const CvMat* etalons,
                               int grad_estim_type,
                               int max_iter,
                               int start_iter );

/*------------------------- functions for the CNN network ------------------------------*/
static void icvCNNetworkAddLayer( CvCNNetwork* network, CvCNNLayer* layer );
static void icvCNNetworkRelease( CvCNNetwork** network );

/* In all layer functions we denote input by X and output by Y, where
   X and Y are column-vectors, so that
   length(X)==<n_input_planes>*<input_height>*<input_width>,
   length(Y)==<n_output_planes>*<output_height>*<output_width>.
*/
/*------------------------ functions for convolutional layer ---------------------------*/
static void icvCNNConvolutionRelease( CvCNNLayer** p_layer );

static void icvCNNConvolutionForward( CvCNNLayer* layer, const CvMat* X, CvMat* Y );

static void icvCNNConvolutionBackward( CvCNNLayer*  layer, int t,
    const CvMat* X, const CvMat* dE_dY, CvMat* dE_dX );

/*------------------------ functions for sub-sampling layer ----------------------------*/
static void icvCNNSubSamplingRelease( CvCNNLayer** p_layer );

static void icvCNNSubSamplingForward( CvCNNLayer* layer, const CvMat* X, CvMat* Y );

static void icvCNNSubSamplingBackward( CvCNNLayer*  layer, int t,
    const CvMat* X, const CvMat* dE_dY, CvMat* dE_dX );

/*------------------------ functions for full connected layer --------------------------*/
static void icvCNNFullConnectRelease( CvCNNLayer** p_layer );

static void icvCNNFullConnectForward( CvCNNLayer* layer, const CvMat* X, CvMat* Y );

static void icvCNNFullConnectBackward( CvCNNLayer* layer, int,
    const CvMat*, const CvMat* dE_dY, CvMat* dE_dX );

/****************************************************************************************\
*                             Functions implementations                                  *
\****************************************************************************************/

#define ICV_CHECK_CNN_NETWORK(network)                                                  \
{                                                                                       \
    CvCNNLayer* first_layer, *layer, *last_layer;                                       \
    int n_layers, i;                                                                    \
    if( !network )                                                                      \
        CV_ERROR( CV_StsNullPtr,                                                        \
        "Null <network> pointer. Network must be created by user." );                   \
    n_layers = network->n_layers;                                                       \
    first_layer = last_layer = network->layers;                                         \
    for( i = 0, layer = first_layer; i < n_layers && layer; i++ )                       \
    {                                                                                   \
        if( !ICV_IS_CNN_LAYER(layer) )                                                  \
            CV_ERROR( CV_StsNullPtr, "Invalid network" );                               \
        last_layer = layer;                                                             \
        layer = layer->next_layer;                                                      \
    }                                                                                   \
                                                                                        \
    if( i == 0 || i != n_layers || first_layer->prev_layer || layer )                   \
        CV_ERROR( CV_StsNullPtr, "Invalid network" );                                   \
                                                                                        \
    if( first_layer->n_input_planes != 1 )                                              \
        CV_ERROR( CV_StsBadArg, "First layer must contain only one input plane" );      \
                                                                                        \
    if( img_size != first_layer->input_height*first_layer->input_width )                \
        CV_ERROR( CV_StsBadArg, "Invalid input sizes of the first layer" );             \
                                                                                        \
    if( params->etalons->cols != last_layer->n_output_planes*                           \
        last_layer->output_height*last_layer->output_width )                            \
        CV_ERROR( CV_StsBadArg, "Invalid output sizes of the last layer" );             \
}

#define ICV_CHECK_CNN_MODEL_PARAMS(params)                                              \
{                                                                                       \
    if( !params )                                                                       \
        CV_ERROR( CV_StsNullPtr, "Null <params> pointer" );                             \
                                                                                        \
    if( !ICV_IS_MAT_OF_TYPE(params->etalons, CV_32FC1) )                                \
        CV_ERROR( CV_StsBadArg, "<etalons> must be CV_32FC1 type" );                    \
    if( params->etalons->rows != cnn_model->cls_labels->cols )                          \
        CV_ERROR( CV_StsBadArg, "Invalid <etalons> size" );                             \
                                                                                        \
    if( params->grad_estim_type != CV_CNN_GRAD_ESTIM_RANDOM &&                          \
        params->grad_estim_type != CV_CNN_GRAD_ESTIM_BY_WORST_IMG )                     \
        CV_ERROR( CV_StsBadArg, "Invalid <grad_estim_type>" );                          \
                                                                                        \
    if( params->start_iter < 0 )                                                        \
        CV_ERROR( CV_StsBadArg, "Parameter <start_iter> must be positive or zero" );    \
                                                                                        \
    if( params->max_iter < 1 )                                                \
        params->max_iter = 1;                                                 \
}

/****************************************************************************************\
*                              Classifier functions                                      *
\****************************************************************************************/
ML_IMPL CvStatModel*
cvTrainCNNClassifier( const CvMat* _train_data, int tflag,
            const CvMat* _responses,
            const CvStatModelParams* _params,
            const CvMat*, const CvMat* _sample_idx, const CvMat*, const CvMat* )
{
    CvCNNStatModel* cnn_model    = 0;
    const float** out_train_data = 0;
    CvMat* responses             = 0;

    CV_FUNCNAME("cvTrainCNNClassifier");
    __BEGIN__;

    int n_images;
    int img_size;
    CvCNNStatModelParams* params = (CvCNNStatModelParams*)_params;

    CV_CALL(cnn_model = (CvCNNStatModel*)cvCreateStatModel(
        CV_STAT_MODEL_MAGIC_VAL|CV_CNN_MAGIC_VAL, sizeof(CvCNNStatModel),
        icvCNNModelRelease, icvCNNModelPredict, icvCNNModelUpdate ));

    CV_CALL(cvPrepareTrainData( "cvTrainCNNClassifier",
        _train_data, tflag, _responses, CV_VAR_CATEGORICAL,
        0, _sample_idx, false, &out_train_data,
        &n_images, &img_size, &img_size, &responses,
        &cnn_model->cls_labels, 0 ));

    ICV_CHECK_CNN_MODEL_PARAMS(params);
    ICV_CHECK_CNN_NETWORK(params->network);

    cnn_model->network = params->network;
    CV_CALL(cnn_model->etalons = (CvMat*)cvClone( params->etalons ));

    CV_CALL( icvTrainCNNetwork( cnn_model->network, out_train_data, responses,
        cnn_model->etalons, params->grad_estim_type, params->max_iter,
        params->start_iter ));

    __END__;

    if( cvGetErrStatus() < 0 && cnn_model )
    {
        cnn_model->release( (CvStatModel**)&cnn_model );
    }
    cvFree( &out_train_data );
    cvReleaseMat( &responses );

    return (CvStatModel*)cnn_model;
}

/****************************************************************************************/
static void icvTrainCNNetwork( CvCNNetwork* network,
                               const float** images,
                               const CvMat* responses,
                               const CvMat* etalons,
                               int grad_estim_type,
                               int max_iter,
                               int start_iter )
{
    CvMat** X     = 0;
    CvMat** dE_dX = 0;
    const int n_layers = network->n_layers;
    int k;

    CV_FUNCNAME("icvTrainCNNetwork");
    __BEGIN__;

    CvCNNLayer* first_layer = network->layers;
    const int img_height = first_layer->input_height;
    const int img_width  = first_layer->input_width;
    const int img_size   = img_width*img_height;
    const int n_images   = responses->cols;
    CvMat image = cvMat( 1, img_size, CV_32FC1 );
    CvCNNLayer* layer;
    int n;
    CvRNG rng = cvRNG(-1);

    CV_CALL(X = (CvMat**)cvAlloc( (n_layers+1)*sizeof(CvMat*) ));
    CV_CALL(dE_dX = (CvMat**)cvAlloc( (n_layers+1)*sizeof(CvMat*) ));
    memset( X, 0, (n_layers+1)*sizeof(CvMat*) );
    memset( dE_dX, 0, (n_layers+1)*sizeof(CvMat*) );

    CV_CALL(X[0] = cvCreateMat( img_height*img_width,1,CV_32FC1 ));
    CV_CALL(dE_dX[0] = cvCreateMat( 1, X[0]->rows, CV_32FC1 ));
    for( k = 0, layer = first_layer; k < n_layers; k++, layer = layer->next_layer )
    {
        CV_CALL(X[k+1] = cvCreateMat( layer->n_output_planes*layer->output_height*
            layer->output_width, 1, CV_32FC1 ));
        CV_CALL(dE_dX[k+1] = cvCreateMat( 1, X[k+1]->rows, CV_32FC1 ));
    }

    for( n = 1; n <= max_iter; n++ )
    {
        float loss, max_loss = 0;
        int i;
        int worst_img_idx = -1;
        int* right_etal_idx = responses->data.i;
        CvMat etalon;

        // Find the worst image (which produces the greatest loss) or use the random image
        if( grad_estim_type == CV_CNN_GRAD_ESTIM_BY_WORST_IMG )
        {
            for( i = 0; i < n_images; i++, right_etal_idx++ )
            {
                image.data.fl = (float*)images[i];
                cvTranspose( &image, X[0] );
                
                for( k = 0, layer = first_layer; k < n_layers; k++, layer = layer->next_layer )
                    CV_CALL(layer->forward( layer, X[k], X[k+1] ));
                
                cvTranspose( X[n_layers], dE_dX[n_layers] );
                cvGetRow( etalons, &etalon, *right_etal_idx );
                loss = (float)cvNorm( dE_dX[n_layers], &etalon );
                if( loss > max_loss )
                {
                    max_loss = loss;
                    worst_img_idx = i;
                }
            }
        }
        else
            worst_img_idx = cvRandInt(&rng) % n_images;

        // Train network on the worst image
        // 1) Compute the network output on the <image>
        image.data.fl = (float*)images[worst_img_idx];
        CV_CALL(cvTranspose( &image, X[0] ));

        for( k = 0, layer = first_layer; k < n_layers - 1; k++, layer = layer->next_layer )
            CV_CALL(layer->forward( layer, X[k], X[k+1] ));
        CV_CALL(layer->forward( layer, X[k], X[k+1] ));

        // 2) Compute the gradient
        cvTranspose( X[n_layers], dE_dX[n_layers] );
        cvGetRow( etalons, &etalon, responses->data.i[worst_img_idx] );
        cvSub( dE_dX[n_layers], &etalon, dE_dX[n_layers] );

        // 3) Update weights by the gradient descent
        for( k = n_layers; k > 0; k--, layer = layer->prev_layer )
            CV_CALL(layer->backward( layer, n + start_iter, X[k-1], dE_dX[k], dE_dX[k-1] ));
    }

    __END__;

    for( k = 0; k <= n_layers; k++ )
    {
        cvReleaseMat( &X[k] );
        cvReleaseMat( &dE_dX[k] );
    }
    cvFree( &X );
    cvFree( &dE_dX );
}

/****************************************************************************************/
static float icvCNNModelPredict( const CvStatModel* model,
                                 const CvMat* _image,
                                 CvMat* probs )
{
    CvMat** X       = 0;
    float* img_data = 0;
    int n_layers = 0;
    int best_etal_idx = -1;
    int k;

    CV_FUNCNAME("icvCNNModelPredict");
    __BEGIN__;

    CvCNNStatModel* cnn_model = (CvCNNStatModel*)model;
    CvCNNLayer* first_layer, *layer = 0;
    int img_height, img_width, img_size;
    int nclasses, i;
    float loss, min_loss = FLT_MAX;
    float* probs_data;
    CvMat etalon, image;

    if( !CV_IS_CNN(model) )
        CV_ERROR( CV_StsBadArg, "Invalid model" );

    nclasses = cnn_model->cls_labels->cols;
    n_layers = cnn_model->network->n_layers;
    first_layer   = cnn_model->network->layers;
    img_height = first_layer->input_height;
    img_width  = first_layer->input_width;
    img_size   = img_height*img_width;

    cvPreparePredictData( _image, img_size, 0, nclasses, probs, &img_data );

    CV_CALL(X = (CvMat**)cvAlloc( (n_layers+1)*sizeof(CvMat*) ));
    memset( X, 0, (n_layers+1)*sizeof(CvMat*) );

    CV_CALL(X[0] = cvCreateMat( img_size,1,CV_32FC1 ));
    for( k = 0, layer = first_layer; k < n_layers; k++, layer = layer->next_layer )
    {
        CV_CALL(X[k+1] = cvCreateMat( layer->n_output_planes*layer->output_height*
            layer->output_width, 1, CV_32FC1 ));
    }

    image = cvMat( 1, img_size, CV_32FC1, img_data );
    cvTranspose( &image, X[0] );
    for( k = 0, layer = first_layer; k < n_layers; k++, layer = layer->next_layer )
        CV_CALL(layer->forward( layer, X[k], X[k+1] ));

    probs_data = probs ? probs->data.fl : 0;
    etalon = cvMat( cnn_model->etalons->cols, 1, CV_32FC1, cnn_model->etalons->data.fl );
    for( i = 0; i < nclasses; i++, etalon.data.fl += cnn_model->etalons->cols )
    {
        loss = (float)cvNorm( X[n_layers], &etalon );
        if( loss < min_loss )
        {
            min_loss = loss;
            best_etal_idx = i;
        }
        if( probs )
            *probs_data++ = -loss;
    }

    if( probs )
    {
        cvExp( probs, probs );
        CvScalar sum = cvSum( probs );
        cvConvertScale( probs, probs, 1./sum.val[0] );
    }

    __END__;

    for( k = 0; k <= n_layers; k++ )
        cvReleaseMat( &X[k] );
    cvFree( &X );
    if( img_data != _image->data.fl )
        cvFree( &img_data );

    return ((float) ((CvCNNStatModel*)model)->cls_labels->data.i[best_etal_idx]);
}

/****************************************************************************************/
static void icvCNNModelUpdate(
        CvStatModel* _cnn_model, const CvMat* _train_data, int tflag,
        const CvMat* _responses, const CvStatModelParams* _params,
        const CvMat*, const CvMat* _sample_idx,
        const CvMat*, const CvMat* )
{
    const float** out_train_data = 0;
    CvMat* responses             = 0;
    CvMat* cls_labels            = 0;

    CV_FUNCNAME("icvCNNModelUpdate");
    __BEGIN__;

    int n_images, img_size, i;
    CvCNNStatModelParams* params = (CvCNNStatModelParams*)_params;
    CvCNNStatModel* cnn_model = (CvCNNStatModel*)_cnn_model;

    if( !CV_IS_CNN(cnn_model) )
        CV_ERROR( CV_StsBadArg, "Invalid model" );

    CV_CALL(cvPrepareTrainData( "cvTrainCNNClassifier",
        _train_data, tflag, _responses, CV_VAR_CATEGORICAL,
        0, _sample_idx, false, &out_train_data,
        &n_images, &img_size, &img_size, &responses,
        &cls_labels, 0, 0 ));

    ICV_CHECK_CNN_MODEL_PARAMS(params);

    // Number of classes must be the same as when classifiers was created
    if( !CV_ARE_SIZES_EQ(cls_labels, cnn_model->cls_labels) )
        CV_ERROR( CV_StsBadArg, "Number of classes must be left unchanged" );
    for( i = 0; i < cls_labels->cols; i++ )
    {
        if( cls_labels->data.i[i] != cnn_model->cls_labels->data.i[i] )
            CV_ERROR( CV_StsBadArg, "Number of classes must be left unchanged" );
    }

    CV_CALL( icvTrainCNNetwork( cnn_model->network, out_train_data, responses,
        cnn_model->etalons, params->grad_estim_type, params->max_iter,
        params->start_iter ));

    __END__;

    cvFree( &out_train_data );
    cvReleaseMat( &responses );
}

/****************************************************************************************/
static void icvCNNModelRelease( CvStatModel** cnn_model )
{
    CV_FUNCNAME("icvCNNModelRelease");
    __BEGIN__;

    CvCNNStatModel* cnn;
    if( !cnn_model )
        CV_ERROR( CV_StsNullPtr, "Null double pointer" );

    cnn = *(CvCNNStatModel**)cnn_model;

    cvReleaseMat( &cnn->cls_labels );
    cvReleaseMat( &cnn->etalons );
    cnn->network->release( &cnn->network );

    cvFree( &cnn );

    __END__;

}

/****************************************************************************************\
*                                 Network functions                                      *
\****************************************************************************************/
ML_IMPL CvCNNetwork* cvCreateCNNetwork( CvCNNLayer* first_layer )
{
    CvCNNetwork* network = 0;    
    
    CV_FUNCNAME( "cvCreateCNNetwork" );
    __BEGIN__;

    if( !ICV_IS_CNN_LAYER(first_layer) )
        CV_ERROR( CV_StsBadArg, "Invalid layer" );

    CV_CALL(network = (CvCNNetwork*)cvAlloc( sizeof(CvCNNetwork) ));
    memset( network, 0, sizeof(CvCNNetwork) );

    network->layers    = first_layer;
    network->n_layers  = 1;
    network->release   = icvCNNetworkRelease;
    network->add_layer = icvCNNetworkAddLayer;

    __END__;

    if( cvGetErrStatus() < 0 && network )
        cvFree( &network );

    return network;

}

/****************************************************************************************/
static void icvCNNetworkAddLayer( CvCNNetwork* network, CvCNNLayer* layer )
{
    CV_FUNCNAME( "icvCNNetworkAddLayer" );
    __BEGIN__;

    CvCNNLayer* prev_layer;

    if( network == NULL )
        CV_ERROR( CV_StsNullPtr, "Null <network> pointer" );

    prev_layer = network->layers;
    while( prev_layer->next_layer )
        prev_layer = prev_layer->next_layer;

    if( ICV_IS_CNN_FULLCONNECT_LAYER(layer) )
    {
        if( layer->n_input_planes != prev_layer->output_width*prev_layer->output_height*
            prev_layer->n_output_planes )
            CV_ERROR( CV_StsBadArg, "Unmatched size of the new layer" );
        if( layer->input_height != 1 || layer->output_height != 1 ||
            layer->input_width != 1  || layer->output_width != 1 )
            CV_ERROR( CV_StsBadArg, "Invalid size of the new layer" );
    }
    else if( ICV_IS_CNN_CONVOLUTION_LAYER(layer) || ICV_IS_CNN_SUBSAMPLING_LAYER(layer) )
    {
        if( prev_layer->n_output_planes != layer->n_input_planes ||
        prev_layer->output_height   != layer->input_height ||
        prev_layer->output_width    != layer->input_width )
        CV_ERROR( CV_StsBadArg, "Unmatched size of the new layer" );
    }
    else
        CV_ERROR( CV_StsBadArg, "Invalid layer" );

    layer->prev_layer = prev_layer;
    prev_layer->next_layer = layer;
    network->n_layers++;

    __END__;
}

/****************************************************************************************/
static void icvCNNetworkRelease( CvCNNetwork** network_pptr )
{
    CV_FUNCNAME( "icvReleaseCNNetwork" );
    __BEGIN__;

    CvCNNetwork* network = 0;
    CvCNNLayer* layer = 0, *next_layer = 0;
    int k;

    if( network_pptr == NULL )
        CV_ERROR( CV_StsBadArg, "Null double pointer" );
    if( *network_pptr == NULL )
        return;

    network = *network_pptr;
    layer = network->layers;
    if( layer == NULL )
        CV_ERROR( CV_StsBadArg, "CNN is empty (does not contain any layer)" );

    // k is the number of the layer to be deleted
    for( k = 0; k < network->n_layers && layer; k++ )
    {
        next_layer = layer->next_layer;
        layer->release( &layer );
        layer = next_layer;
    }

    if( k != network->n_layers || layer)
        CV_ERROR( CV_StsBadArg, "Invalid network" );

    cvFree( &network );

    __END__;
}

/****************************************************************************************\
*                                  Layer functions                                       *
\****************************************************************************************/
static CvCNNLayer* icvCreateCNNLayer( int layer_type, int header_size,
    int n_input_planes, int input_height, int input_width,
    int n_output_planes, int output_height, int output_width,
    float init_learn_rate, int learn_rate_decrease_type,
    CvCNNLayerRelease release, CvCNNLayerForward forward, CvCNNLayerBackward backward )
{
    CvCNNLayer* layer = 0;

    CV_FUNCNAME("icvCreateCNNLayer");
    __BEGIN__;

    CV_ASSERT( release && forward && backward )
    CV_ASSERT( header_size >= sizeof(CvCNNLayer) )

    if( n_input_planes < 1 || n_output_planes < 1 ||
        input_height   < 1 || input_width < 1 ||
        output_height  < 1 || output_width < 1 ||
        input_height < output_height ||
        input_width  < output_width )
        CV_ERROR( CV_StsBadArg, "Incorrect input or output parameters" );
    if( init_learn_rate < FLT_EPSILON )
        CV_ERROR( CV_StsBadArg, "Initial learning rate must be positive" );
    if( learn_rate_decrease_type != CV_CNN_LEARN_RATE_DECREASE_HYPERBOLICALLY &&
        learn_rate_decrease_type != CV_CNN_LEARN_RATE_DECREASE_SQRT_INV &&
        learn_rate_decrease_type != CV_CNN_LEARN_RATE_DECREASE_LOG_INV )
        CV_ERROR( CV_StsBadArg, "Invalid type of learning rate dynamics" );

    CV_CALL(layer = (CvCNNLayer*)cvAlloc( header_size ));
    memset( layer, 0, header_size );

    layer->flags = ICV_CNN_LAYER|layer_type;
    CV_ASSERT( ICV_IS_CNN_LAYER(layer) )

    layer->n_input_planes = n_input_planes;
    layer->input_height   = input_height;
    layer->input_width    = input_width;

    layer->n_output_planes = n_output_planes;
    layer->output_height   = output_height;
    layer->output_width    = output_width;

    layer->init_learn_rate = init_learn_rate;
    layer->learn_rate_decrease_type = learn_rate_decrease_type;

    layer->release  = release;
    layer->forward  = forward;
    layer->backward = backward;

    __END__;

    if( cvGetErrStatus() < 0 && layer)
        cvFree( &layer );

    return layer;
}

/****************************************************************************************/
ML_IMPL CvCNNLayer* cvCreateCNNConvolutionLayer(
    int n_input_planes, int input_height, int input_width,
    int n_output_planes, int K,
    float init_learn_rate, int learn_rate_decrease_type,
    CvMat* connect_mask, CvMat* weights )

{
    CvCNNConvolutionLayer* layer = 0;

    CV_FUNCNAME("cvCreateCNNConvolutionLayer");
    __BEGIN__;

    const int output_height = input_height - K + 1;
    const int output_width = input_width - K + 1;

    if( K < 1 || init_learn_rate <= 0 )
        CV_ERROR( CV_StsBadArg, "Incorrect parameters" );

    CV_CALL(layer = (CvCNNConvolutionLayer*)icvCreateCNNLayer( ICV_CNN_CONVOLUTION_LAYER,
        sizeof(CvCNNConvolutionLayer), n_input_planes, input_height, input_width,
        n_output_planes, output_height, output_width,
        init_learn_rate, learn_rate_decrease_type,
        icvCNNConvolutionRelease, icvCNNConvolutionForward, icvCNNConvolutionBackward ));

    layer->K = K;
    CV_CALL(layer->weights = cvCreateMat( n_output_planes, K*K+1, CV_32FC1 ));
    CV_CALL(layer->connect_mask = cvCreateMat( n_output_planes, n_input_planes, CV_8UC1));

    if( weights )
    {
        if( !ICV_IS_MAT_OF_TYPE( weights, CV_32FC1 ) )
            CV_ERROR( CV_StsBadSize, "Type of initial weights matrix must be CV_32FC1" );
        if( !CV_ARE_SIZES_EQ( weights, layer->weights ) )
            CV_ERROR( CV_StsBadSize, "Invalid size of initial weights matrix" );
        CV_CALL(cvCopy( weights, layer->weights ));
    }
    else
    {
        CvRNG rng = cvRNG( 0xFFFFFFFF );
        cvRandArr( &rng, layer->weights, CV_RAND_UNI, cvRealScalar(-1), cvRealScalar(1) );
    }
 
    if( connect_mask )
    {
        if( !ICV_IS_MAT_OF_TYPE( connect_mask, CV_8UC1 ) )
            CV_ERROR( CV_StsBadSize, "Type of connection matrix must be CV_32FC1" );
        if( !CV_ARE_SIZES_EQ( connect_mask, layer->connect_mask ) )
            CV_ERROR( CV_StsBadSize, "Invalid size of connection matrix" );
        CV_CALL(cvCopy( connect_mask, layer->connect_mask ));
    }
    else
        CV_CALL(cvSet( layer->connect_mask, cvRealScalar(1) ));

    __END__;

    if( cvGetErrStatus() < 0 && layer )
    {
        cvReleaseMat( &layer->weights );
        cvReleaseMat( &layer->connect_mask );
        cvFree( &layer );
    }
    
    return (CvCNNLayer*)layer;
}

/****************************************************************************************/
ML_IMPL CvCNNLayer* cvCreateCNNSubSamplingLayer(
    int n_input_planes, int input_height, int input_width,
    int sub_samp_scale, float a, float s,
    float init_learn_rate, int learn_rate_decrease_type, CvMat* weights )

{
    CvCNNSubSamplingLayer* layer = 0;

    CV_FUNCNAME("cvCreateCNNSubSamplingLayer");
    __BEGIN__;

    const int output_height   = input_height/sub_samp_scale;
    const int output_width    = input_width/sub_samp_scale;
    const int n_output_planes = n_input_planes;

    if( sub_samp_scale < 1 || a <= 0 || s <= 0)
        CV_ERROR( CV_StsBadArg, "Incorrect parameters" );

    CV_CALL(layer = (CvCNNSubSamplingLayer*)icvCreateCNNLayer( ICV_CNN_SUBSAMPLING_LAYER,
        sizeof(CvCNNSubSamplingLayer), n_input_planes, input_height, input_width,
        n_output_planes, output_height, output_width,
        init_learn_rate, learn_rate_decrease_type,
        icvCNNSubSamplingRelease, icvCNNSubSamplingForward, icvCNNSubSamplingBackward ));

    layer->sub_samp_scale  = sub_samp_scale;
    layer->a               = a;
    layer->s               = s;

    CV_CALL(layer->sumX =
        cvCreateMat( n_output_planes*output_width*output_height, 1, CV_32FC1 ));
    CV_CALL(layer->exp2ssumWX =
        cvCreateMat( n_output_planes*output_width*output_height, 1, CV_32FC1 ));

    cvZero( layer->sumX );
    cvZero( layer->exp2ssumWX );

    CV_CALL(layer->weights = cvCreateMat( n_output_planes, 2, CV_32FC1 ));
    if( weights )
    {
        if( !ICV_IS_MAT_OF_TYPE( weights, CV_32FC1 ) )
            CV_ERROR( CV_StsBadSize, "Type of initial weights matrix must be CV_32FC1" );
        if( !CV_ARE_SIZES_EQ( weights, layer->weights ) )
            CV_ERROR( CV_StsBadSize, "Invalid size of initial weights matrix" );
        CV_CALL(cvCopy( weights, layer->weights ));
    }
    else
    {
        CvRNG rng = cvRNG( 0xFFFFFFFF );
        cvRandArr( &rng, layer->weights, CV_RAND_UNI, cvRealScalar(-1), cvRealScalar(1) );
    }

    __END__;

    if( cvGetErrStatus() < 0 && layer )
    {
        cvReleaseMat( &layer->exp2ssumWX );
        cvFree( &layer );
    }

    return (CvCNNLayer*)layer;
}

/****************************************************************************************/
ML_IMPL CvCNNLayer* cvCreateCNNFullConnectLayer( 
    int n_inputs, int n_outputs, float a, float s,
    float init_learn_rate, int learn_rate_decrease_type, CvMat* weights )
{
    CvCNNFullConnectLayer* layer = 0;

    CV_FUNCNAME("cvCreateCNNFullConnectLayer");
    __BEGIN__;

    if( a <= 0 || s <= 0 || init_learn_rate <= 0)
        CV_ERROR( CV_StsBadArg, "Incorrect parameters" );

    CV_CALL(layer = (CvCNNFullConnectLayer*)icvCreateCNNLayer( ICV_CNN_FULLCONNECT_LAYER,
        sizeof(CvCNNFullConnectLayer), n_inputs, 1, 1, n_outputs, 1, 1,
        init_learn_rate, learn_rate_decrease_type,
        icvCNNFullConnectRelease, icvCNNFullConnectForward, icvCNNFullConnectBackward ));

    layer->a = a;
    layer->s = s;

    CV_CALL(layer->exp2ssumWX = cvCreateMat( n_outputs, 1, CV_32FC1 ));
    cvZero( layer->exp2ssumWX );

    CV_CALL(layer->weights = cvCreateMat( n_outputs, n_inputs+1, CV_32FC1 ));
    if( weights )
    {
        if( !ICV_IS_MAT_OF_TYPE( weights, CV_32FC1 ) )
            CV_ERROR( CV_StsBadSize, "Type of initial weights matrix must be CV_32FC1" );
        if( !CV_ARE_SIZES_EQ( weights, layer->weights ) )
            CV_ERROR( CV_StsBadSize, "Invalid size of initial weights matrix" );
        CV_CALL(cvCopy( weights, layer->weights ));
    }
    else
    {
        CvRNG rng = cvRNG( 0xFFFFFFFF );
        cvRandArr( &rng, layer->weights, CV_RAND_UNI, cvRealScalar(-1), cvRealScalar(1) );
    }

    __END__;

    if( cvGetErrStatus() < 0 && layer )
    {
        cvReleaseMat( &layer->exp2ssumWX );
        cvReleaseMat( &layer->weights );
        cvFree( &layer );
    }

    return (CvCNNLayer*)layer;
}


/****************************************************************************************\
*                           Layer FORWARD functions                                      *
\****************************************************************************************/
static void icvCNNConvolutionForward( CvCNNLayer* _layer,
                                      const CvMat* X,
                                      CvMat* Y )
{
    CV_FUNCNAME("icvCNNConvolutionForward");

    if( !ICV_IS_CNN_CONVOLUTION_LAYER(_layer) )
        CV_ERROR( CV_StsBadArg, "Invalid layer" );

    {__BEGIN__;

    const CvCNNConvolutionLayer* layer = (CvCNNConvolutionLayer*) _layer;

    const int K = layer->K;
    const int n_weights_for_Yplane = K*K + 1;

    const int nXplanes = layer->n_input_planes;
    const int Xheight  = layer->input_height;
    const int Xwidth   = layer->input_width ;
    const int Xsize    = Xwidth*Xheight;

    const int nYplanes = layer->n_output_planes;
    const int Yheight  = layer->output_height;
    const int Ywidth   = layer->output_width;
    const int Ysize    = Ywidth*Yheight;

    int xx, yy, ni, no, kx, ky;
    float *Yplane = 0, *Xplane = 0, *w = 0;
    uchar* connect_mask_data = 0;

    CV_ASSERT( X->rows == nXplanes*Xsize && X->cols == 1 );
    CV_ASSERT( Y->rows == nYplanes*Ysize && Y->cols == 1 );

    cvSetZero( Y );

    Yplane = Y->data.fl;
    connect_mask_data = layer->connect_mask->data.ptr;
    w = layer->weights->data.fl;
    for( no = 0; no < nYplanes; no++, Yplane += Ysize, w += n_weights_for_Yplane )
    {
        Xplane = X->data.fl;
        for( ni = 0; ni < nXplanes; ni++, Xplane += Xsize, connect_mask_data++ )
        {
            if( *connect_mask_data )
            {
                float* Yelem = Yplane;

                // Xheight-K+1 == Yheight && Xwidth-K+1 == Ywidth
                for( yy = 0; yy < Xheight-K+1; yy++ )
                {
                    for( xx = 0; xx < Xwidth-K+1; xx++, Yelem++ )
                    {
                        float* templ = Xplane+yy*Xwidth+xx;
                        float WX = 0;
                        for( ky = 0; ky < K; ky++, templ += Xwidth-K )
                        {
                            for( kx = 0; kx < K; kx++, templ++ )
                            {
                                WX += *templ*w[ky*K+kx];
                            }
                        }
                        *Yelem += WX + w[K*K];
                    }
                }
            }
        }
    }
    }__END__;
}

/****************************************************************************************/
static void icvCNNSubSamplingForward( CvCNNLayer* _layer,
                                      const CvMat* X,
                                      CvMat* Y )
{
    CV_FUNCNAME("icvCNNSubSamplingForward");

    if( !ICV_IS_CNN_SUBSAMPLING_LAYER(_layer) )
        CV_ERROR( CV_StsBadArg, "Invalid layer" );

    {__BEGIN__;

    const CvCNNSubSamplingLayer* layer = (CvCNNSubSamplingLayer*) _layer;

    const int sub_sampl_scale = layer->sub_samp_scale;
    const int nplanes = layer->n_input_planes;

    const int Xheight = layer->input_height;
    const int Xwidth  = layer->input_width ;
    const int Xsize   = Xwidth*Xheight;

    const int Yheight = layer->output_height;
    const int Ywidth  = layer->output_width;
    const int Ysize   = Ywidth*Yheight;

    int xx, yy, ni, kx, ky;
    float* sumX_data = 0, *w = 0;
    CvMat sumX_sub_col, exp2ssumWX_sub_col;

    CV_ASSERT(X->rows == nplanes*Xsize && X->cols == 1);
    CV_ASSERT(layer->exp2ssumWX->cols == 1 && layer->exp2ssumWX->rows == nplanes*Ysize);

    // update inner variable layer->exp2ssumWX, which will be used in back-progation
    cvZero( layer->sumX );
    cvZero( layer->exp2ssumWX );

    for( ky = 0; ky < sub_sampl_scale; ky++ )
        for( kx = 0; kx < sub_sampl_scale; kx++ )
        {
            float* Xplane = X->data.fl;
            sumX_data = layer->sumX->data.fl;
            for( ni = 0; ni < nplanes; ni++, Xplane += Xsize )
            {
                for( yy = 0; yy < Yheight; yy++ )
                    for( xx = 0; xx < Ywidth; xx++, sumX_data++ )
                        *sumX_data += Xplane[((yy+ky)*Xwidth+(xx+kx))];
            }
        }                

    w = layer->weights->data.fl;
    cvGetRows( layer->sumX, &sumX_sub_col, 0, Ysize );
    cvGetRows( layer->exp2ssumWX, &exp2ssumWX_sub_col, 0, Ysize );
    for( ni = 0; ni < nplanes; ni++, w += 2 )
    {
        CV_CALL(cvConvertScale( &sumX_sub_col, &exp2ssumWX_sub_col, w[0], w[1] ));
        sumX_sub_col.data.fl += Ysize;
        exp2ssumWX_sub_col.data.fl += Ysize;
    }

    CV_CALL(cvScale( layer->exp2ssumWX, layer->exp2ssumWX, 2.0*layer->s ));
    CV_CALL(cvExp( layer->exp2ssumWX, layer->exp2ssumWX ));
    CV_CALL(cvMinS( layer->exp2ssumWX, FLT_MAX, layer->exp2ssumWX ));
//#ifdef _DEBUG
    {
        float* exp2ssumWX_data = layer->exp2ssumWX->data.fl;
        for( ni = 0; ni < layer->exp2ssumWX->rows; ni++, exp2ssumWX_data++ )
        {
            if( *exp2ssumWX_data == FLT_MAX )
                cvSetErrStatus( 1 );
        }
    }
//#endif
    // compute the output variable Y == ( a - 2a/(layer->exp2ssumWX + 1))
    CV_CALL(cvAddS( layer->exp2ssumWX, cvRealScalar(1), Y ));
    CV_CALL(cvDiv( 0, Y, Y, -2.0*layer->a ));
    CV_CALL(cvAddS( Y, cvRealScalar(layer->a), Y ));

    }__END__;
}

/****************************************************************************************/
static void icvCNNFullConnectForward( CvCNNLayer* _layer, const CvMat* X, CvMat* Y )
{
    CV_FUNCNAME("icvCNNFullConnectForward");

    if( !ICV_IS_CNN_FULLCONNECT_LAYER(_layer) )
        CV_ERROR( CV_StsBadArg, "Invalid layer" );

    {__BEGIN__;

    const CvCNNFullConnectLayer* layer = (CvCNNFullConnectLayer*)_layer;
    CvMat* weights = layer->weights;
    CvMat sub_weights, bias;

    CV_ASSERT(X->cols == 1 && X->rows == layer->n_input_planes);
    CV_ASSERT(Y->cols == 1 && Y->rows == layer->n_output_planes);

    CV_CALL(cvGetSubRect( weights, &sub_weights,
                          cvRect(0, 0, weights->cols-1, weights->rows )));
    CV_CALL(cvGetCol( weights, &bias, weights->cols-1));

    // update inner variable layer->exp2ssumWX, which will be used in Back-Propagation
    CV_CALL(cvGEMM( &sub_weights, X, 2*layer->s, &bias, 2*layer->s, layer->exp2ssumWX ));
    CV_CALL(cvExp( layer->exp2ssumWX, layer->exp2ssumWX ));
    CV_CALL(cvMinS( layer->exp2ssumWX, FLT_MAX, layer->exp2ssumWX ));
//#ifdef _DEBUG
    {
        float* exp2ssumWX_data = layer->exp2ssumWX->data.fl;
        int i;
        for( i = 0; i < layer->exp2ssumWX->rows; i++, exp2ssumWX_data++ )
        {
            if( *exp2ssumWX_data == FLT_MAX )
                cvSetErrStatus( 1 );
        }
    }
//#endif
    // compute the output variable Y == ( a - 2a/(layer->exp2ssumWX + 1))
    CV_CALL(cvAddS( layer->exp2ssumWX, cvRealScalar(1), Y ));
    CV_CALL(cvDiv( 0, Y, Y, -2.0*layer->a ));
    CV_CALL(cvAddS( Y, cvRealScalar(layer->a), Y ));

    }__END__;
}

/****************************************************************************************\
*                           Layer BACKWARD functions                                     *
\****************************************************************************************/

/* <dE_dY>, <dE_dX> should be row-vectors.
   Function computes partial derivatives <dE_dX>
   of the loss function with respect to the planes components
   of the previous layer (X).
   It is a basic function for back propagation method.
   Input parameter <dE_dY> is the partial derivative of the
   loss function with respect to the planes components
   of the current layer. */
static void icvCNNConvolutionBackward(
    CvCNNLayer* _layer, int t, const CvMat* X, const CvMat* dE_dY, CvMat* dE_dX )
{
    CvMat* dY_dX = 0;
    CvMat* dY_dW = 0;
    CvMat* dE_dW = 0;

    CV_FUNCNAME("icvCNNConvolutionBackward");

    if( !ICV_IS_CNN_CONVOLUTION_LAYER(_layer) )
        CV_ERROR( CV_StsBadArg, "Invalid layer" );

    {__BEGIN__;

    const CvCNNConvolutionLayer* layer = (CvCNNConvolutionLayer*) _layer;

    const int K = layer->K;

    const int n_X_planes     = layer->n_input_planes;
    const int X_plane_height = layer->input_height;
    const int X_plane_width  = layer->input_width;
    const int X_plane_size   = X_plane_height*X_plane_width;

    const int n_Y_planes     = layer->n_output_planes;
    const int Y_plane_height = layer->output_height;
    const int Y_plane_width  = layer->output_width;
    const int Y_plane_size   = Y_plane_height*Y_plane_width;

    int no, ni, yy, xx, ky, kx;
    int X_idx = 0, Y_idx = 0;

    float *X_plane = 0, *w = 0;

    CvMat* weights = layer->weights;

    CV_ASSERT( t >= 1 );
    CV_ASSERT( n_Y_planes == weights->rows );

    dY_dX = cvCreateMat( n_Y_planes*Y_plane_size, X->rows, CV_32FC1 );
    dY_dW = cvCreateMat( dY_dX->rows, weights->cols*weights->rows, CV_32FC1 );
    dE_dW = cvCreateMat( 1, dY_dW->cols, CV_32FC1 );

    cvZero( dY_dX );
    cvZero( dY_dW );

    // compute gradient of the loss function with respect to X and W
    for( no = 0; no < n_Y_planes; no++, Y_idx += Y_plane_size )
    {
        w = weights->data.fl + no*(K*K+1);
        X_idx = 0;
        X_plane = X->data.fl;
        for( ni = 0; ni < n_X_planes; ni++, X_plane += X_plane_size )
        {
            if( layer->connect_mask->data.ptr[ni*n_Y_planes+no] )
            {
                for( yy = 0; yy < X_plane_height - K + 1; yy++ )
                {
                    for( xx = 0; xx < X_plane_width - K + 1; xx++ )
                    {
                        for( ky = 0; ky < K; ky++ )
                        {
                            for( kx = 0; kx < K; kx++ )
                            {
                                CV_MAT_ELEM(*dY_dX, float, Y_idx+yy*Y_plane_width+xx,
                                    X_idx+(yy+ky)*X_plane_width+(xx+kx)) = w[ky*K+kx];

                                // dY_dWi, i=1,...,K*K
                                CV_MAT_ELEM(*dY_dW, float, Y_idx+yy*Y_plane_width+xx,
                                    no*(K*K+1)+ky*K+kx) +=
                                    X_plane[(yy+ky)*X_plane_width+(xx+kx)];
                            }
                        }
                        // dY_dW(K*K+1)==1 because W(K*K+1) is bias
                        CV_MAT_ELEM(*dY_dW, float, Y_idx+yy*Y_plane_width+xx,
                            no*(K*K+1)+K*K) += 1;
                    }
                }
            }
            X_idx += X_plane_size;
        }
    }

    CV_CALL(cvMatMul( dE_dY, dY_dW, dE_dW ));
    CV_CALL(cvMatMul( dE_dY, dY_dX, dE_dX ));

    // update weights
    {
        CvMat dE_dW_mat;
        float eta;
        if( layer->learn_rate_decrease_type == CV_CNN_LEARN_RATE_DECREASE_LOG_INV )
            eta = -layer->init_learn_rate/logf(1+(float)t);
        else if( layer->learn_rate_decrease_type == CV_CNN_LEARN_RATE_DECREASE_SQRT_INV )
            eta = -layer->init_learn_rate/sqrtf((float)t);
        else
            eta = -layer->init_learn_rate/(float)t;
        cvReshape( dE_dW, &dE_dW_mat, 0, weights->rows );
        cvScaleAdd( &dE_dW_mat, cvRealScalar(eta), weights, weights );
    }

    }__END__;

    cvReleaseMat( &dY_dX );
    cvReleaseMat( &dY_dW );
    cvReleaseMat( &dE_dW );
}

/****************************************************************************************/
static void icvCNNSubSamplingBackward(
    CvCNNLayer* _layer, int t, const CvMat*, const CvMat* dE_dY, CvMat* dE_dX )
{
    // derivative of activation function
    CvMat* dY_dX_elems = 0; // elements of matrix dY_dX
    CvMat* dY_dW_elems = 0; // elements of matrix dY_dW
    CvMat* dE_dW = 0;

    CV_FUNCNAME("icvCNNSubSamplingBackward");

    if( !ICV_IS_CNN_SUBSAMPLING_LAYER(_layer) )
        CV_ERROR( CV_StsBadArg, "Invalid layer" );

    {__BEGIN__;

    const CvCNNSubSamplingLayer* layer = (CvCNNSubSamplingLayer*) _layer;

    const int Xwidth  = layer->input_width;
    const int Ywidth  = layer->output_width;
    const int Yheight = layer->output_height;
    const int Ysize   = Ywidth * Yheight;
    const int scale   = layer->sub_samp_scale;
    const int k_max   = layer->n_output_planes * Yheight;

    int k, i, j, m;
    float* dY_dX_current_elem = 0, *dE_dX_start = 0, *dE_dW_data = 0, *w = 0;
    CvMat dy_dw0, dy_dw1;
    CvMat activ_func_der, sumX_row;
    CvMat dE_dY_sub_row, dY_dX_sub_col, dy_dw0_sub_row, dy_dw1_sub_row;

    CV_CALL(dY_dX_elems = cvCreateMat( layer->sumX->rows, 1, CV_32FC1 ));
    CV_CALL(dY_dW_elems = cvCreateMat( 2, layer->sumX->rows, CV_32FC1 ));
    CV_CALL(dE_dW = cvCreateMat( 1, 2*layer->n_output_planes, CV_32FC1 ));

    // compute derivative of activ.func.
    // ==<dY_dX_elems> = 4as*(layer->exp2ssumWX)/(layer->exp2ssumWX + 1)^2
    CV_CALL(cvAddS( layer->exp2ssumWX, cvRealScalar(1), dY_dX_elems ));
    CV_CALL(cvPow( dY_dX_elems, dY_dX_elems, -2.0 ));
    CV_CALL(cvMul( dY_dX_elems, layer->exp2ssumWX, dY_dX_elems, 4.0*layer->a*layer->s ));

    // compute <dE_dW>
    // a) compute <dY_dW_elems>
    cvReshape( dY_dX_elems, &activ_func_der, 0, 1 );
    cvGetRow( dY_dW_elems, &dy_dw0, 0 );
    cvGetRow( dY_dW_elems, &dy_dw1, 1 );
    CV_CALL(cvCopy( &activ_func_der, &dy_dw0 ));
    CV_CALL(cvCopy( &activ_func_der, &dy_dw1 ));

    cvReshape( layer->sumX, &sumX_row, 0, 1 );
    cvMul( &dy_dw0, &sumX_row, &dy_dw0 );

    // b) compute <dE_dW> = <dE_dY>*<dY_dW_elems>
    cvGetCols( dE_dY, &dE_dY_sub_row, 0, Ysize );
    cvGetCols( &dy_dw0, &dy_dw0_sub_row, 0, Ysize );
    cvGetCols( &dy_dw1, &dy_dw1_sub_row, 0, Ysize );
    dE_dW_data = dE_dW->data.fl;
    for( i = 0; i < layer->n_output_planes; i++ )
    {
        *dE_dW_data++ = (float)cvDotProduct( &dE_dY_sub_row, &dy_dw0_sub_row );
        *dE_dW_data++ = (float)cvDotProduct( &dE_dY_sub_row, &dy_dw1_sub_row );

        dE_dY_sub_row.data.fl += Ysize;
        dy_dw0_sub_row.data.fl += Ysize;
        dy_dw1_sub_row.data.fl += Ysize;
    }

    // compute <dY_dX> = layer->weights*<dY_dX>
    w = layer->weights->data.fl;
    cvGetRows( dY_dX_elems, &dY_dX_sub_col, 0, Ysize );
    for( i = 0; i < layer->n_input_planes; i++, w++, dY_dX_sub_col.data.fl += Ysize )
        CV_CALL(cvConvertScale( &dY_dX_sub_col, &dY_dX_sub_col, (float)*w ));

    // compute <dE_dX>
    CV_CALL(cvReshape( dY_dX_elems, dY_dX_elems, 0, 1 ));
    CV_CALL(cvMul( dY_dX_elems, dE_dY, dY_dX_elems ));

    dY_dX_current_elem = dY_dX_elems->data.fl;
    dE_dX_start = dE_dX->data.fl;
    for( k = 0; k < k_max; k++ )
    {
        for( i = 0; i < Ywidth; i++, dY_dX_current_elem++ )
        {
            float* dE_dX_current_elem = dE_dX_start;
            for( j = 0; j < scale; j++, dE_dX_current_elem += Xwidth - scale )
            {
                for( m = 0; m < scale; m++, dE_dX_current_elem++ )
                    *dE_dX_current_elem = *dY_dX_current_elem;
            }
            dE_dX_start += scale;
        }
        dE_dX_start += Xwidth * (scale - 1);
    }

    // update weights
    {
        CvMat dE_dW_mat, *weights = layer->weights;
        float eta;
        if( layer->learn_rate_decrease_type == CV_CNN_LEARN_RATE_DECREASE_LOG_INV )
            eta = -layer->init_learn_rate/logf(1+(float)t);
        else if( layer->learn_rate_decrease_type == CV_CNN_LEARN_RATE_DECREASE_SQRT_INV )
            eta = -layer->init_learn_rate/sqrtf((float)t);
        else
            eta = -layer->init_learn_rate/(float)t;
        cvReshape( dE_dW, &dE_dW_mat, 0, weights->rows );
        cvScaleAdd( &dE_dW_mat, cvRealScalar(eta), weights, weights );
    }

    }__END__;

    cvReleaseMat( &dY_dX_elems );
    cvReleaseMat( &dY_dW_elems );
    cvReleaseMat( &dE_dW );
}

/****************************************************************************************/
/* <dE_dY>, <dE_dX> should be row-vectors.
   Function computes partial derivatives <dE_dX>, <dE_dW>
   of the loss function with respect to the planes components
   of the previous layer (X) and the weights of the current layer (W)
   and updates weights od the current layer by using <dE_dW>.
   It is a basic function for back propagation method.
   Input parameter <dE_dY> is the partial derivative of the
   loss function with respect to the planes components
   of the current layer. */
static void icvCNNFullConnectBackward( CvCNNLayer* _layer,
                                    int t,
                                    const CvMat* X,
                                    const CvMat* dE_dY,
                                    CvMat* dE_dX )
{
    CvMat* dE_dY_activ_func_der = 0;
    CvMat* dE_dW = 0;
    
    CV_FUNCNAME( "icvCNNFullConnectBackward" );

    if( !ICV_IS_CNN_FULLCONNECT_LAYER(_layer) )
        CV_ERROR( CV_StsBadArg, "Invalid layer" );

    {__BEGIN__;

    const CvCNNFullConnectLayer* layer = (CvCNNFullConnectLayer*)_layer;
    const int n_outputs = layer->n_output_planes;
    const int n_inputs  = layer->n_input_planes;

    int i;
    float* dE_dY_activ_func_der_data;
    CvMat* weights = layer->weights;
    CvMat sub_weights, Xtemplate, Xrow, exp2ssumWXrow;

    CV_ASSERT(X->cols == 1 && X->rows == n_inputs);
    CV_ASSERT(dE_dY->rows == 1 && dE_dY->cols == n_outputs );
    CV_ASSERT(dE_dX->rows == 1 && dE_dX->cols == n_inputs );
    
    // we violate the convetion about vector's orientation because
    // here is more convenient to make this parameter a row-vector 
    CV_CALL(dE_dY_activ_func_der = cvCreateMat( 1, n_outputs, CV_32FC1 ));
    CV_CALL(dE_dW = cvCreateMat( 1, weights->rows*weights->cols, CV_32FC1 ));
    
    // 1) compute gradients dE_dX and dE_dW
    // activ_func_der == 4as*(layer->exp2ssumWX)/(layer->exp2ssumWX + 1)^2
    CV_CALL(cvReshape( layer->exp2ssumWX, &exp2ssumWXrow, 0, layer->exp2ssumWX->cols ));
    CV_CALL(cvAddS( &exp2ssumWXrow, cvRealScalar(1), dE_dY_activ_func_der ));
    CV_CALL(cvPow( dE_dY_activ_func_der, dE_dY_activ_func_der, -2.0 ));
    CV_CALL(cvMul( dE_dY_activ_func_der, &exp2ssumWXrow, dE_dY_activ_func_der,
                   4.0*layer->a*layer->s ));
    CV_CALL(cvMul( dE_dY, dE_dY_activ_func_der, dE_dY_activ_func_der ));

    // sub_weights = d(W*(X|1))/dX
    CV_CALL(cvGetSubRect( weights, &sub_weights,
        cvRect(0, 0, weights->cols-1, weights->rows) ));
    CV_CALL(cvMatMul( dE_dY_activ_func_der, &sub_weights, dE_dX ));

    cvReshape( X, &Xrow, 0, 1 );
    dE_dY_activ_func_der_data = dE_dY_activ_func_der->data.fl;
    Xtemplate = cvMat( 1, n_inputs, CV_32FC1, dE_dW->data.fl );
    for( i = 0; i < n_outputs; i++, Xtemplate.data.fl += n_inputs + 1 )
    {
        CV_CALL(cvConvertScale( &Xrow, &Xtemplate, *dE_dY_activ_func_der_data ));
        Xtemplate.data.fl[n_inputs] = *dE_dY_activ_func_der_data++;
    }

    // 2) update weights
    {
        CvMat dE_dW_mat;
        float eta;
        if( layer->learn_rate_decrease_type == CV_CNN_LEARN_RATE_DECREASE_LOG_INV )
            eta = -layer->init_learn_rate/logf(1+(float)t);
        else if( layer->learn_rate_decrease_type == CV_CNN_LEARN_RATE_DECREASE_SQRT_INV )
            eta = -layer->init_learn_rate/sqrtf((float)t);
        else
            eta = -layer->init_learn_rate/(float)t;
        cvReshape( dE_dW, &dE_dW_mat, 0, n_outputs );
        cvScaleAdd( &dE_dW_mat, cvRealScalar(eta), weights, weights );
    }

    }__END__;

    cvReleaseMat( &dE_dY_activ_func_der );
    cvReleaseMat( &dE_dW );
}

/****************************************************************************************\
*                           Layer RELEASE functions                                      *
\****************************************************************************************/
static void icvCNNConvolutionRelease( CvCNNLayer** p_layer )
{
    CV_FUNCNAME("icvCNNConvolutionRelease");
    __BEGIN__;

    CvCNNConvolutionLayer* layer = 0;

    if( !p_layer )
        CV_ERROR( CV_StsNullPtr, "Null double pointer" );

    layer = *(CvCNNConvolutionLayer**)p_layer;

    if( !layer )
        return;
    if( !ICV_IS_CNN_CONVOLUTION_LAYER(layer) )
        CV_ERROR( CV_StsBadArg, "Invalid layer" );

    cvReleaseMat( &layer->weights );
    cvReleaseMat( &layer->connect_mask );
    cvFree( p_layer );

    __END__;
}

/****************************************************************************************/
static void icvCNNSubSamplingRelease( CvCNNLayer** p_layer )
{
    CV_FUNCNAME("icvCNNSubSamplingRelease");
    __BEGIN__;

    CvCNNSubSamplingLayer* layer = 0;
    
    if( !p_layer )
        CV_ERROR( CV_StsNullPtr, "Null double pointer" );

    layer = *(CvCNNSubSamplingLayer**)p_layer;

    if( !layer )
        return;
    if( !ICV_IS_CNN_SUBSAMPLING_LAYER(layer) )
        CV_ERROR( CV_StsBadArg, "Invalid layer" );

    cvReleaseMat( &layer->exp2ssumWX );
    cvReleaseMat( &layer->weights );
    cvFree( p_layer );

    __END__;
}

/****************************************************************************************/
static void icvCNNFullConnectRelease( CvCNNLayer** p_layer )
{
    CV_FUNCNAME("icvCNNFullConnectRelease");
    __BEGIN__;

    CvCNNFullConnectLayer* layer = 0;
    
    if( !p_layer )
        CV_ERROR( CV_StsNullPtr, "Null double pointer" );

    layer = *(CvCNNFullConnectLayer**)p_layer;

    if( !layer )
        return;
    if( !ICV_IS_CNN_FULLCONNECT_LAYER(layer) )
        CV_ERROR( CV_StsBadArg, "Invalid layer" );

    cvReleaseMat( &layer->exp2ssumWX );
    cvReleaseMat( &layer->weights );
    cvFree( p_layer );

    __END__;
}

/****************************************************************************************\
*                              Read/Write CNN classifier                                 *
\****************************************************************************************/
static int icvIsCNNModel( const void* ptr )
{
    return CV_IS_CNN(ptr);
}

/****************************************************************************************/
static void icvReleaseCNNModel( void** ptr )
{
    CV_FUNCNAME("icvReleaseCNNModel");
    __BEGIN__;

    if( !ptr )
        CV_ERROR( CV_StsNullPtr, "NULL double pointer" );
    CV_ASSERT(CV_IS_CNN(*ptr));

    icvCNNModelRelease( (CvStatModel**)ptr );

    __END__;
}

/****************************************************************************************/
static CvCNNLayer* icvReadCNNLayer( CvFileStorage* fs, CvFileNode* node )
{
    CvCNNLayer* layer = 0;
    CvMat* weights    = 0;
    CvMat* connect_mask = 0;

    CV_FUNCNAME("icvReadCNNLayer");
    __BEGIN__;

    int n_input_planes, input_height, input_width;
    int n_output_planes, output_height, output_width;
    int learn_type, layer_type;
    float init_learn_rate;

    CV_CALL(n_input_planes  = cvReadIntByName( fs, node, "n_input_planes",  -1 ));
    CV_CALL(input_height    = cvReadIntByName( fs, node, "input_height",    -1 ));
    CV_CALL(input_width     = cvReadIntByName( fs, node, "input_width",     -1 ));
    CV_CALL(n_output_planes = cvReadIntByName( fs, node, "n_output_planes", -1 ));
    CV_CALL(output_height   = cvReadIntByName( fs, node, "output_height",   -1 ));
    CV_CALL(output_width    = cvReadIntByName( fs, node, "output_width",    -1 ));
    CV_CALL(layer_type      = cvReadIntByName( fs, node, "layer_type",      -1 ));

    CV_CALL(init_learn_rate = (float)cvReadRealByName( fs, node, "init_learn_rate", -1 ));
    CV_CALL(learn_type = cvReadIntByName( fs, node, "learn_rate_decrease_type", -1 ));
    CV_CALL(weights    = (CvMat*)cvReadByName( fs, node, "weights" ));

    if( n_input_planes < 0  || input_height < 0  || input_width < 0 ||
        n_output_planes < 0 || output_height < 0 || output_width < 0 ||
        init_learn_rate < 0 || learn_type < 0 || layer_type < 0 || !weights )
        CV_ERROR( CV_StsParseError, "" );

    if( layer_type == ICV_CNN_CONVOLUTION_LAYER )
    {
        const int K = input_height - output_height + 1;
        if( K <= 0 || K != input_width - output_width + 1 )
            CV_ERROR( CV_StsBadArg, "Invalid <K>" );

        CV_CALL(connect_mask = (CvMat*)cvReadByName( fs, node, "connect_mask" ));
        if( !connect_mask )
            CV_ERROR( CV_StsParseError, "Missing <connect mask>" );

        CV_CALL(layer = cvCreateCNNConvolutionLayer( 
            n_input_planes, input_height, input_width, n_output_planes, K,
            init_learn_rate, learn_type, connect_mask, weights ));
    }
    else if( layer_type == ICV_CNN_SUBSAMPLING_LAYER )
    {
        float a, s;
        const int sub_samp_scale = input_height/output_height;

        if( sub_samp_scale <= 0 || sub_samp_scale != input_width/output_width )
            CV_ERROR( CV_StsBadArg, "Invalid <sub_samp_scale>" );

        CV_CALL(a = (float)cvReadRealByName( fs, node, "a", -1 ));
        CV_CALL(s = (float)cvReadRealByName( fs, node, "s", -1 ));
        if( a  < 0 || s  < 0 )
            CV_ERROR( CV_StsParseError, "Missing <a> or <s>" );

        CV_CALL(layer = cvCreateCNNSubSamplingLayer(
            n_input_planes, input_height, input_width, sub_samp_scale,
            a, s, init_learn_rate, learn_type, weights ));
    }
    else if( layer_type == ICV_CNN_FULLCONNECT_LAYER )
    {
        float a, s;
        CV_CALL(a = (float)cvReadRealByName( fs, node, "a", -1 ));
        CV_CALL(s = (float)cvReadRealByName( fs, node, "s", -1 ));
        if( a  < 0 || s  < 0 )
            CV_ERROR( CV_StsParseError, "" );
        if( input_height != 1  || input_width != 1 ||
            output_height != 1 || output_width != 1 )
            CV_ERROR( CV_StsBadArg, "" );

        CV_CALL(layer = cvCreateCNNFullConnectLayer( n_input_planes, n_output_planes,
            a, s, init_learn_rate, learn_type, weights ));
    }
    else
        CV_ERROR( CV_StsBadArg, "Invalid <layer_type>" );

    __END__;

    if( cvGetErrStatus() < 0 && layer )
        layer->release( &layer );

    cvReleaseMat( &weights );
    cvReleaseMat( &connect_mask );

    return layer;
}

/****************************************************************************************/
static void icvWriteCNNLayer( CvFileStorage* fs, CvCNNLayer* layer )
{
    CV_FUNCNAME ("icvWriteCNNLayer");
    __BEGIN__;

    if( !ICV_IS_CNN_LAYER(layer) )
        CV_ERROR( CV_StsBadArg, "Invalid layer" );

    CV_CALL( cvStartWriteStruct( fs, NULL, CV_NODE_MAP, "opencv-ml-cnn-layer" ));

    CV_CALL(cvWriteInt( fs, "n_input_planes",  layer->n_input_planes ));
    CV_CALL(cvWriteInt( fs, "input_height",    layer->input_height ));
    CV_CALL(cvWriteInt( fs, "input_width",     layer->input_width ));
    CV_CALL(cvWriteInt( fs, "n_output_planes", layer->n_output_planes ));
    CV_CALL(cvWriteInt( fs, "output_height",   layer->output_height ));
    CV_CALL(cvWriteInt( fs, "output_width",    layer->output_width ));
    CV_CALL(cvWriteInt( fs, "learn_rate_decrease_type", layer->learn_rate_decrease_type));
    CV_CALL(cvWriteReal( fs, "init_learn_rate", layer->init_learn_rate ));
    CV_CALL(cvWrite( fs, "weights", layer->weights ));

    if( ICV_IS_CNN_CONVOLUTION_LAYER( layer ))
    {
        CvCNNConvolutionLayer* l = (CvCNNConvolutionLayer*)layer;
        CV_CALL(cvWriteInt( fs, "layer_type", ICV_CNN_CONVOLUTION_LAYER ));
        CV_CALL(cvWrite( fs, "connect_mask", l->connect_mask ));
    }
    else if( ICV_IS_CNN_SUBSAMPLING_LAYER( layer ) )
    {
        CvCNNSubSamplingLayer* l = (CvCNNSubSamplingLayer*)layer;
        CV_CALL(cvWriteInt( fs, "layer_type", ICV_CNN_SUBSAMPLING_LAYER ));
        CV_CALL(cvWriteReal( fs, "a", l->a ));
        CV_CALL(cvWriteReal( fs, "s", l->s ));
    }
    else if( ICV_IS_CNN_FULLCONNECT_LAYER( layer ) )
    {
        CvCNNFullConnectLayer* l = (CvCNNFullConnectLayer*)layer;
        CV_CALL(cvWriteInt( fs, "layer_type", ICV_CNN_FULLCONNECT_LAYER ));
        CV_CALL(cvWriteReal( fs, "a", l->a ));
        CV_CALL(cvWriteReal( fs, "s", l->s ));
    }
    else
        CV_ERROR( CV_StsBadArg, "Invalid layer" );

    CV_CALL( cvEndWriteStruct( fs )); //"opencv-ml-cnn-layer"

    __END__;
}

/****************************************************************************************/
static void* icvReadCNNModel( CvFileStorage* fs, CvFileNode* root_node )
{
    CvCNNStatModel* cnn = 0;
    CvCNNLayer* layer = 0;

    CV_FUNCNAME("icvReadCNNModel");
    __BEGIN__;

    CvFileNode* node;
    CvSeq* seq;
    CvSeqReader reader;
    int i;

    CV_CALL(cnn = (CvCNNStatModel*)cvCreateStatModel(
        CV_STAT_MODEL_MAGIC_VAL|CV_CNN_MAGIC_VAL, sizeof(CvCNNStatModel),
        icvCNNModelRelease, icvCNNModelPredict, icvCNNModelUpdate ));

    CV_CALL(cnn->etalons = (CvMat*)cvReadByName( fs, root_node, "etalons" ));
    CV_CALL(cnn->cls_labels = (CvMat*)cvReadByName( fs, root_node, "cls_labels" ));

    if( !cnn->etalons || !cnn->cls_labels )
        CV_ERROR( CV_StsParseError, "No <etalons> or <cls_labels> in CNN model" );

    CV_CALL( node = cvGetFileNodeByName( fs, root_node, "network" ));
    seq = node->data.seq;
    if( !CV_NODE_IS_SEQ(node->tag) )
        CV_ERROR( CV_StsBadArg, "" );

    CV_CALL( cvStartReadSeq( seq, &reader, 0 ));
    CV_CALL(layer = icvReadCNNLayer( fs, (CvFileNode*)reader.ptr ));
    CV_CALL(cnn->network = cvCreateCNNetwork( layer ));

    for( i = 1; i < seq->total; i++ )
    {
        CV_NEXT_SEQ_ELEM( seq->elem_size, reader );
        CV_CALL(layer = icvReadCNNLayer( fs, (CvFileNode*)reader.ptr ));
        CV_CALL(cnn->network->add_layer( cnn->network, layer ));
    }

    __END__;

    if( cvGetErrStatus() < 0 )
    {
        if( cnn ) cnn->release( (CvStatModel**)&cnn );
        if( layer ) layer->release( &layer );
    }
    return (void*)cnn;
}

/****************************************************************************************/
static void
icvWriteCNNModel( CvFileStorage* fs, const char* name,
                  const void* struct_ptr, CvAttrList )
                                   
{
    CV_FUNCNAME ("icvWriteCNNModel");
    __BEGIN__;

    CvCNNStatModel* cnn = (CvCNNStatModel*)struct_ptr;
    int n_layers, i;
    CvCNNLayer* layer;

    if( !CV_IS_CNN(cnn) )
        CV_ERROR( CV_StsBadArg, "Invalid pointer" );

    n_layers = cnn->network->n_layers;
    
    CV_CALL( cvStartWriteStruct( fs, name, CV_NODE_MAP, CV_TYPE_NAME_ML_CNN ));

    CV_CALL(cvWrite( fs, "etalons", cnn->etalons ));
    CV_CALL(cvWrite( fs, "cls_labels", cnn->cls_labels ));
    
    CV_CALL( cvStartWriteStruct( fs, "network", CV_NODE_SEQ ));

    layer = cnn->network->layers;
    for( i = 0; i < n_layers && layer; i++, layer = layer->next_layer )
        CV_CALL(icvWriteCNNLayer( fs, layer ));
    if( i < n_layers || layer )
        CV_ERROR( CV_StsBadArg, "Invalid network" );

    CV_CALL( cvEndWriteStruct( fs )); //"network"
    CV_CALL( cvEndWriteStruct( fs )); //"opencv-ml-cnn"

    __END__;
}

static int icvRegisterCNNStatModelType()
{
    CvTypeInfo info;

    info.header_size = sizeof( info );
    info.is_instance = icvIsCNNModel;
    info.release = icvReleaseCNNModel;
    info.read = icvReadCNNModel;
    info.write = icvWriteCNNModel;
    info.clone = NULL;
    info.type_name = CV_TYPE_NAME_ML_CNN;
    cvRegisterType( &info );

    return 1;
} // End of icvRegisterCNNStatModelType

static int cnn = icvRegisterCNNStatModelType();

#endif

// End of file