/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                        Intel License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of Intel Corporation may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "_cv.h"

typedef struct
{
    float xx;
    float xy;
    float yy;
    float xt;
    float yt;
}
icvDerProduct;


#define CONV( A, B, C)  ((float)( A +  (B<<1)  + C ))
/*F///////////////////////////////////////////////////////////////////////////////////////
//    Name: icvCalcOpticalFlowLK_8u32fR ( Lucas & Kanade method )
//    Purpose: calculate Optical flow for 2 images using Lucas & Kanade algorithm
//    Context:
//    Parameters:
//            imgA,         // pointer to first frame ROI
//            imgB,         // pointer to second frame ROI
//            imgStep,      // width of single row of source images in bytes
//            imgSize,      // size of the source image ROI
//            winSize,      // size of the averaging window used for grouping
//            velocityX,    // pointer to horizontal and
//            velocityY,    // vertical components of optical flow ROI
//            velStep       // width of single row of velocity frames in bytes
//
//    Returns: CV_OK         - all ok
//             CV_OUTOFMEM_ERR  - insufficient memory for function work
//             CV_NULLPTR_ERR - if one of input pointers is NULL
//             CV_BADSIZE_ERR   - wrong input sizes interrelation
//
//    Notes:  1.Optical flow to be computed for every pixel in ROI
//            2.For calculating spatial derivatives we use 3x3 Sobel operator.
//            3.We use the following border mode.
//              The last row or column is replicated for the border
//              ( IPL_BORDER_REPLICATE in IPL ).
//
//
//F*/
static CvStatus CV_STDCALL
icvCalcOpticalFlowLK_8u32fR( uchar * imgA,
                             uchar * imgB,
                             int imgStep,
                             CvSize imgSize,
                             CvSize winSize,
                             float *velocityX,
                             float *velocityY, int velStep )
{
    /* Loops indexes */
    int i, j, k;

    /* Gaussian separable kernels */
    float GaussX[16];
    float GaussY[16];
    float *KerX;
    float *KerY;

    /* Buffers for Sobel calculations */
    float *MemX[2];
    float *MemY[2];

    float ConvX, ConvY;
    float GradX, GradY, GradT;

    int winWidth = winSize.width;
    int winHeight = winSize.height;

    int imageWidth = imgSize.width;
    int imageHeight = imgSize.height;

    int HorRadius = (winWidth - 1) >> 1;
    int VerRadius = (winHeight - 1) >> 1;

    int PixelLine;
    int ConvLine;

    int BufferAddress;

    int BufferHeight = 0;
    int BufferWidth;
    int BufferSize;

    /* buffers derivatives product */
    icvDerProduct *II;

    /* buffers for gaussian horisontal convolution */
    icvDerProduct *WII;

    /* variables for storing number of first pixel of image line */
    int Line1;
    int Line2;
    int Line3;

    /* we must have 2*2 linear system coeffs
       | A1B2  B1 |  {u}   {C1}   {0}
       |          |  { } + {  } = { }
       | A2  A1B2 |  {v}   {C2}   {0}
     */
    float A1B2, A2, B1, C1, C2;

    int pixNumber;

    /* auxiliary */
    int NoMem = 0;

    velStep /= sizeof(velocityX[0]);

    /* Checking bad arguments */
    if( imgA == NULL )
        return CV_NULLPTR_ERR;
    if( imgB == NULL )
        return CV_NULLPTR_ERR;

    if( imageHeight < winHeight )
        return CV_BADSIZE_ERR;
    if( imageWidth < winWidth )
        return CV_BADSIZE_ERR;

    if( winHeight >= 16 )
        return CV_BADSIZE_ERR;
    if( winWidth >= 16 )
        return CV_BADSIZE_ERR;

    if( !(winHeight & 1) )
        return CV_BADSIZE_ERR;
    if( !(winWidth & 1) )
        return CV_BADSIZE_ERR;

    BufferHeight = winHeight;
    BufferWidth = imageWidth;

    /****************************************************************************************/
    /* Computing Gaussian coeffs                                                            */
    /****************************************************************************************/
    GaussX[0] = 1;
    GaussY[0] = 1;
    for( i = 1; i < winWidth; i++ )
    {
        GaussX[i] = 1;
        for( j = i - 1; j > 0; j-- )
        {
            GaussX[j] += GaussX[j - 1];
        }
    }
    for( i = 1; i < winHeight; i++ )
    {
        GaussY[i] = 1;
        for( j = i - 1; j > 0; j-- )
        {
            GaussY[j] += GaussY[j - 1];
        }
    }
    KerX = &GaussX[HorRadius];
    KerY = &GaussY[VerRadius];

    /****************************************************************************************/
    /* Allocating memory for all buffers                                                    */
    /****************************************************************************************/
    for( k = 0; k < 2; k++ )
    {
        MemX[k] = (float *) cvAlloc( (imgSize.height) * sizeof( float ));

        if( MemX[k] == NULL )
            NoMem = 1;
        MemY[k] = (float *) cvAlloc( (imgSize.width) * sizeof( float ));

        if( MemY[k] == NULL )
            NoMem = 1;
    }

    BufferSize = BufferHeight * BufferWidth;

    II = (icvDerProduct *) cvAlloc( BufferSize * sizeof( icvDerProduct ));
    WII = (icvDerProduct *) cvAlloc( BufferSize * sizeof( icvDerProduct ));


    if( (II == NULL) || (WII == NULL) )
        NoMem = 1;

    if( NoMem )
    {
        for( k = 0; k < 2; k++ )
        {
            if( MemX[k] )
                cvFree( &MemX[k] );

            if( MemY[k] )
                cvFree( &MemY[k] );
        }
        if( II )
            cvFree( &II );
        if( WII )
            cvFree( &WII );

        return CV_OUTOFMEM_ERR;
    }

    /****************************************************************************************/
    /*        Calculate first line of memX and memY                                         */
    /****************************************************************************************/
    MemY[0][0] = MemY[1][0] = CONV( imgA[0], imgA[0], imgA[1] );
    MemX[0][0] = MemX[1][0] = CONV( imgA[0], imgA[0], imgA[imgStep] );

    for( j = 1; j < imageWidth - 1; j++ )
    {
        MemY[0][j] = MemY[1][j] = CONV( imgA[j - 1], imgA[j], imgA[j + 1] );
    }

    pixNumber = imgStep;
    for( i = 1; i < imageHeight - 1; i++ )
    {
        MemX[0][i] = MemX[1][i] = CONV( imgA[pixNumber - imgStep],
                                        imgA[pixNumber], imgA[pixNumber + imgStep] );
        pixNumber += imgStep;
    }

    MemY[0][imageWidth - 1] =
        MemY[1][imageWidth - 1] = CONV( imgA[imageWidth - 2],
                                        imgA[imageWidth - 1], imgA[imageWidth - 1] );

    MemX[0][imageHeight - 1] =
        MemX[1][imageHeight - 1] = CONV( imgA[pixNumber - imgStep],
                                         imgA[pixNumber], imgA[pixNumber] );


    /****************************************************************************************/
    /*    begin scan image, calc derivatives and solve system                               */
    /****************************************************************************************/

    PixelLine = -VerRadius;
    ConvLine = 0;
    BufferAddress = -BufferWidth;

    while( PixelLine < imageHeight )
    {
        if( ConvLine < imageHeight )
        {
            /*Here we calculate derivatives for line of image */
            int address;

            i = ConvLine;
            int L1 = i - 1;
            int L2 = i;
            int L3 = i + 1;

            int memYline = L3 & 1;

            if( L1 < 0 )
                L1 = 0;
            if( L3 >= imageHeight )
                L3 = imageHeight - 1;

            BufferAddress += BufferWidth;
            BufferAddress -= ((BufferAddress >= BufferSize) ? 0xffffffff : 0) & BufferSize;

            address = BufferAddress;

            Line1 = L1 * imgStep;
            Line2 = L2 * imgStep;
            Line3 = L3 * imgStep;

            /* Process first pixel */
            ConvX = CONV( imgA[Line1 + 1], imgA[Line2 + 1], imgA[Line3 + 1] );
            ConvY = CONV( imgA[Line3], imgA[Line3], imgA[Line3 + 1] );

            GradY = ConvY - MemY[memYline][0];
            GradX = ConvX - MemX[1][L2];

            MemY[memYline][0] = ConvY;
            MemX[1][L2] = ConvX;

            GradT = (float) (imgB[Line2] - imgA[Line2]);

            II[address].xx = GradX * GradX;
            II[address].xy = GradX * GradY;
            II[address].yy = GradY * GradY;
            II[address].xt = GradX * GradT;
            II[address].yt = GradY * GradT;
            address++;
            /* Process middle of line */
            for( j = 1; j < imageWidth - 1; j++ )
            {
                ConvX = CONV( imgA[Line1 + j + 1], imgA[Line2 + j + 1], imgA[Line3 + j + 1] );
                ConvY = CONV( imgA[Line3 + j - 1], imgA[Line3 + j], imgA[Line3 + j + 1] );

                GradY = ConvY - MemY[memYline][j];
                GradX = ConvX - MemX[(j - 1) & 1][L2];

                MemY[memYline][j] = ConvY;
                MemX[(j - 1) & 1][L2] = ConvX;

                GradT = (float) (imgB[Line2 + j] - imgA[Line2 + j]);

                II[address].xx = GradX * GradX;
                II[address].xy = GradX * GradY;
                II[address].yy = GradY * GradY;
                II[address].xt = GradX * GradT;
                II[address].yt = GradY * GradT;

                address++;
            }
            /* Process last pixel of line */
            ConvX = CONV( imgA[Line1 + imageWidth - 1], imgA[Line2 + imageWidth - 1],
                          imgA[Line3 + imageWidth - 1] );

            ConvY = CONV( imgA[Line3 + imageWidth - 2], imgA[Line3 + imageWidth - 1],
                          imgA[Line3 + imageWidth - 1] );


            GradY = ConvY - MemY[memYline][imageWidth - 1];
            GradX = ConvX - MemX[(imageWidth - 2) & 1][L2];

            MemY[memYline][imageWidth - 1] = ConvY;

            GradT = (float) (imgB[Line2 + imageWidth - 1] - imgA[Line2 + imageWidth - 1]);

            II[address].xx = GradX * GradX;
            II[address].xy = GradX * GradY;
            II[address].yy = GradY * GradY;
            II[address].xt = GradX * GradT;
            II[address].yt = GradY * GradT;
            address++;

            /* End of derivatives for line */

            /****************************************************************************************/
            /* ---------Calculating horizontal convolution of processed line----------------------- */
            /****************************************************************************************/
            address -= BufferWidth;
            /* process first HorRadius pixels */
            for( j = 0; j < HorRadius; j++ )
            {
                int jj;

                WII[address].xx = 0;
                WII[address].xy = 0;
                WII[address].yy = 0;
                WII[address].xt = 0;
                WII[address].yt = 0;

                for( jj = -j; jj <= HorRadius; jj++ )
                {
                    float Ker = KerX[jj];

                    WII[address].xx += II[address + jj].xx * Ker;
                    WII[address].xy += II[address + jj].xy * Ker;
                    WII[address].yy += II[address + jj].yy * Ker;
                    WII[address].xt += II[address + jj].xt * Ker;
                    WII[address].yt += II[address + jj].yt * Ker;
                }
                address++;
            }
            /* process inner part of line */
            for( j = HorRadius; j < imageWidth - HorRadius; j++ )
            {
                int jj;
                float Ker0 = KerX[0];

                WII[address].xx = 0;
                WII[address].xy = 0;
                WII[address].yy = 0;
                WII[address].xt = 0;
                WII[address].yt = 0;

                for( jj = 1; jj <= HorRadius; jj++ )
                {
                    float Ker = KerX[jj];

                    WII[address].xx += (II[address - jj].xx + II[address + jj].xx) * Ker;
                    WII[address].xy += (II[address - jj].xy + II[address + jj].xy) * Ker;
                    WII[address].yy += (II[address - jj].yy + II[address + jj].yy) * Ker;
                    WII[address].xt += (II[address - jj].xt + II[address + jj].xt) * Ker;
                    WII[address].yt += (II[address - jj].yt + II[address + jj].yt) * Ker;
                }
                WII[address].xx += II[address].xx * Ker0;
                WII[address].xy += II[address].xy * Ker0;
                WII[address].yy += II[address].yy * Ker0;
                WII[address].xt += II[address].xt * Ker0;
                WII[address].yt += II[address].yt * Ker0;

                address++;
            }
            /* process right side */
            for( j = imageWidth - HorRadius; j < imageWidth; j++ )
            {
                int jj;

                WII[address].xx = 0;
                WII[address].xy = 0;
                WII[address].yy = 0;
                WII[address].xt = 0;
                WII[address].yt = 0;

                for( jj = -HorRadius; jj < imageWidth - j; jj++ )
                {
                    float Ker = KerX[jj];

                    WII[address].xx += II[address + jj].xx * Ker;
                    WII[address].xy += II[address + jj].xy * Ker;
                    WII[address].yy += II[address + jj].yy * Ker;
                    WII[address].xt += II[address + jj].xt * Ker;
                    WII[address].yt += II[address + jj].yt * Ker;
                }
                address++;
            }
        }

        /****************************************************************************************/
        /*  Calculating velocity line                                                           */
        /****************************************************************************************/
        if( PixelLine >= 0 )
        {
            int USpace;
            int BSpace;
            int address;

            if( PixelLine < VerRadius )
                USpace = PixelLine;
            else
                USpace = VerRadius;

            if( PixelLine >= imageHeight - VerRadius )
                BSpace = imageHeight - PixelLine - 1;
            else
                BSpace = VerRadius;

            address = ((PixelLine - USpace) % BufferHeight) * BufferWidth;
            for( j = 0; j < imageWidth; j++ )
            {
                int addr = address;

                A1B2 = 0;
                A2 = 0;
                B1 = 0;
                C1 = 0;
                C2 = 0;

                for( i = -USpace; i <= BSpace; i++ )
                {
                    A2 += WII[addr + j].xx * KerY[i];
                    A1B2 += WII[addr + j].xy * KerY[i];
                    B1 += WII[addr + j].yy * KerY[i];
                    C2 += WII[addr + j].xt * KerY[i];
                    C1 += WII[addr + j].yt * KerY[i];

                    addr += BufferWidth;
                    addr -= ((addr >= BufferSize) ? 0xffffffff : 0) & BufferSize;
                }
                /****************************************************************************************\
                * Solve Linear System                                                                    *
                \****************************************************************************************/
                {
                    float delta = (A1B2 * A1B2 - A2 * B1);

                    if( delta )
                    {
                        /* system is not singular - solving by Kramer method */
                        float deltaX;
                        float deltaY;
                        float Idelta = 8 / delta;

                        deltaX = -(C1 * A1B2 - C2 * B1);
                        deltaY = -(A1B2 * C2 - A2 * C1);

                        velocityX[j] = deltaX * Idelta;
                        velocityY[j] = deltaY * Idelta;
                    }
                    else
                    {
                        /* singular system - find optical flow in gradient direction */
                        float Norm = (A1B2 + A2) * (A1B2 + A2) + (B1 + A1B2) * (B1 + A1B2);

                        if( Norm )
                        {
                            float IGradNorm = 8 / Norm;
                            float temp = -(C1 + C2) * IGradNorm;

                            velocityX[j] = (A1B2 + A2) * temp;
                            velocityY[j] = (B1 + A1B2) * temp;

                        }
                        else
                        {
                            velocityX[j] = 0;
                            velocityY[j] = 0;
                        }
                    }
                }
                /****************************************************************************************\
                * End of Solving Linear System                                                           *
                \****************************************************************************************/
            }                   /*for */
            velocityX += velStep;
            velocityY += velStep;
        }                       /*for */
        PixelLine++;
        ConvLine++;
    }

    /* Free memory */
    for( k = 0; k < 2; k++ )
    {
        cvFree( &MemX[k] );
        cvFree( &MemY[k] );
    }
    cvFree( &II );
    cvFree( &WII );

    return CV_OK;
} /*icvCalcOpticalFlowLK_8u32fR*/


/*F///////////////////////////////////////////////////////////////////////////////////////
//    Name:    cvCalcOpticalFlowLK
//    Purpose: Optical flow implementation
//    Context: 
//    Parameters:
//             srcA, srcB - source image
//             velx, vely - destination image
//    Returns:
//
//    Notes:
//F*/
CV_IMPL void
cvCalcOpticalFlowLK( const void* srcarrA, const void* srcarrB, CvSize winSize,
                     void* velarrx, void* velarry )
{
    CV_FUNCNAME( "cvCalcOpticalFlowLK" );

    __BEGIN__;

    CvMat stubA, *srcA = (CvMat*)srcarrA;
    CvMat stubB, *srcB = (CvMat*)srcarrB;
    CvMat stubx, *velx = (CvMat*)velarrx;
    CvMat stuby, *vely = (CvMat*)velarry;

    CV_CALL( srcA = cvGetMat( srcA, &stubA ));
    CV_CALL( srcB = cvGetMat( srcB, &stubB ));

    CV_CALL( velx = cvGetMat( velx, &stubx ));
    CV_CALL( vely = cvGetMat( vely, &stuby ));

    if( !CV_ARE_TYPES_EQ( srcA, srcB ))
        CV_ERROR( CV_StsUnmatchedFormats, "Source images have different formats" );

    if( !CV_ARE_TYPES_EQ( velx, vely ))
        CV_ERROR( CV_StsUnmatchedFormats, "Destination images have different formats" );

    if( !CV_ARE_SIZES_EQ( srcA, srcB ) ||
        !CV_ARE_SIZES_EQ( velx, vely ) ||
        !CV_ARE_SIZES_EQ( srcA, velx ))
        CV_ERROR( CV_StsUnmatchedSizes, "" );

    if( CV_MAT_TYPE( srcA->type ) != CV_8UC1 ||
        CV_MAT_TYPE( velx->type ) != CV_32FC1 )
        CV_ERROR( CV_StsUnsupportedFormat, "Source images must have 8uC1 type and "
                                           "destination images must have 32fC1 type" );

    if( srcA->step != srcB->step || velx->step != vely->step )
        CV_ERROR( CV_BadStep, "source and destination images have different step" );

    IPPI_CALL( icvCalcOpticalFlowLK_8u32fR( (uchar*)srcA->data.ptr, (uchar*)srcB->data.ptr,
                                            srcA->step, cvGetMatSize( srcA ), winSize,
                                            velx->data.fl, vely->data.fl, velx->step ));

    __END__;
}

/* End of file. */