/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::cuda;
#if !defined (HAVE_CUDA) || !defined (HAVE_OPENCV_CUDAARITHM) || defined (CUDA_DISABLER)
Ptr<cuda::TemplateMatching> cv::cuda::createTemplateMatching(int, int, Size) { throw_no_cuda(); return Ptr<cuda::TemplateMatching>(); }
#else
namespace cv { namespace cuda { namespace device
{
namespace match_template
{
void matchTemplateNaive_CCORR_8U(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
void matchTemplateNaive_CCORR_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
void matchTemplateNaive_SQDIFF_8U(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
void matchTemplateNaive_SQDIFF_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<double> image_sqsum, double templ_sqsum, PtrStepSzf result,
int cn, cudaStream_t stream);
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz<double> image_sqsum, double templ_sqsum, PtrStepSzf result,
int cn, cudaStream_t stream);
void matchTemplatePrepared_CCOFF_8U(int w, int h, const PtrStepSz<int> image_sum, int templ_sum, PtrStepSzf result, cudaStream_t stream);
void matchTemplatePrepared_CCOFF_8UC2(
int w, int h,
const PtrStepSz<int> image_sum_r,
const PtrStepSz<int> image_sum_g,
int templ_sum_r,
int templ_sum_g,
PtrStepSzf result, cudaStream_t stream);
void matchTemplatePrepared_CCOFF_8UC3(
int w, int h,
const PtrStepSz<int> image_sum_r,
const PtrStepSz<int> image_sum_g,
const PtrStepSz<int> image_sum_b,
int templ_sum_r,
int templ_sum_g,
int templ_sum_b,
PtrStepSzf result, cudaStream_t stream);
void matchTemplatePrepared_CCOFF_8UC4(
int w, int h,
const PtrStepSz<int> image_sum_r,
const PtrStepSz<int> image_sum_g,
const PtrStepSz<int> image_sum_b,
const PtrStepSz<int> image_sum_a,
int templ_sum_r,
int templ_sum_g,
int templ_sum_b,
int templ_sum_a,
PtrStepSzf result, cudaStream_t stream);
void matchTemplatePrepared_CCOFF_NORMED_8U(
int w, int h, const PtrStepSz<int> image_sum,
const PtrStepSz<double> image_sqsum,
int templ_sum, double templ_sqsum,
PtrStepSzf result, cudaStream_t stream);
void matchTemplatePrepared_CCOFF_NORMED_8UC2(
int w, int h,
const PtrStepSz<int> image_sum_r, const PtrStepSz<double> image_sqsum_r,
const PtrStepSz<int> image_sum_g, const PtrStepSz<double> image_sqsum_g,
int templ_sum_r, double templ_sqsum_r,
int templ_sum_g, double templ_sqsum_g,
PtrStepSzf result, cudaStream_t stream);
void matchTemplatePrepared_CCOFF_NORMED_8UC3(
int w, int h,
const PtrStepSz<int> image_sum_r, const PtrStepSz<double> image_sqsum_r,
const PtrStepSz<int> image_sum_g, const PtrStepSz<double> image_sqsum_g,
const PtrStepSz<int> image_sum_b, const PtrStepSz<double> image_sqsum_b,
int templ_sum_r, double templ_sqsum_r,
int templ_sum_g, double templ_sqsum_g,
int templ_sum_b, double templ_sqsum_b,
PtrStepSzf result, cudaStream_t stream);
void matchTemplatePrepared_CCOFF_NORMED_8UC4(
int w, int h,
const PtrStepSz<int> image_sum_r, const PtrStepSz<double> image_sqsum_r,
const PtrStepSz<int> image_sum_g, const PtrStepSz<double> image_sqsum_g,
const PtrStepSz<int> image_sum_b, const PtrStepSz<double> image_sqsum_b,
const PtrStepSz<int> image_sum_a, const PtrStepSz<double> image_sqsum_a,
int templ_sum_r, double templ_sqsum_r,
int templ_sum_g, double templ_sqsum_g,
int templ_sum_b, double templ_sqsum_b,
int templ_sum_a, double templ_sqsum_a,
PtrStepSzf result, cudaStream_t stream);
void normalize_8U(int w, int h, const PtrStepSz<double> image_sqsum,
double templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream);
void extractFirstChannel_32F(const PtrStepSzb image, PtrStepSzf result, int cn, cudaStream_t stream);
}
}}}
namespace
{
// Evaluates optimal template's area threshold. If
// template's area is less than the threshold, we use naive match
// template version, otherwise FFT-based (if available)
int getTemplateThreshold(int method, int depth)
{
switch (method)
{
case TM_CCORR:
if (depth == CV_32F) return 250;
if (depth == CV_8U) return 300;
break;
case TM_SQDIFF:
if (depth == CV_8U) return 300;
break;
}
CV_Error(Error::StsBadArg, "unsupported match template mode");
return 0;
}
///////////////////////////////////////////////////////////////
// CCORR_32F
class Match_CCORR_32F : public TemplateMatching
{
public:
explicit Match_CCORR_32F(Size user_block_size);
void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
private:
Ptr<cuda::Convolution> conv_;
GpuMat result_;
};
Match_CCORR_32F::Match_CCORR_32F(Size user_block_size)
{
conv_ = cuda::createConvolution(user_block_size);
}
void Match_CCORR_32F::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& _stream)
{
using namespace cv::cuda::device::match_template;
GpuMat image = _image.getGpuMat();
GpuMat templ = _templ.getGpuMat();
CV_Assert( image.depth() == CV_32F );
CV_Assert( image.type() == templ.type() );
CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
cudaStream_t stream = StreamAccessor::getStream(_stream);
_result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
GpuMat result = _result.getGpuMat();
if (templ.size().area() < getTemplateThreshold(TM_CCORR, CV_32F))
{
matchTemplateNaive_CCORR_32F(image, templ, result, image.channels(), stream);
return;
}
if (image.channels() == 1)
{
conv_->convolve(image.reshape(1), templ.reshape(1), result, true, _stream);
}
else
{
conv_->convolve(image.reshape(1), templ.reshape(1), result_, true, _stream);
extractFirstChannel_32F(result_, result, image.channels(), stream);
}
}
///////////////////////////////////////////////////////////////
// CCORR_8U
class Match_CCORR_8U : public TemplateMatching
{
public:
explicit Match_CCORR_8U(Size user_block_size) : match32F_(user_block_size)
{
}
void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
private:
GpuMat imagef_, templf_;
Match_CCORR_32F match32F_;
};
void Match_CCORR_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
{
using namespace cv::cuda::device::match_template;
GpuMat image = _image.getGpuMat();
GpuMat templ = _templ.getGpuMat();
CV_Assert( image.depth() == CV_8U );
CV_Assert( image.type() == templ.type() );
CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
if (templ.size().area() < getTemplateThreshold(TM_CCORR, CV_8U))
{
_result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
GpuMat result = _result.getGpuMat();
matchTemplateNaive_CCORR_8U(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
return;
}
image.convertTo(imagef_, CV_32F, stream);
templ.convertTo(templf_, CV_32F, stream);
match32F_.match(imagef_, templf_, _result, stream);
}
///////////////////////////////////////////////////////////////
// CCORR_NORMED_8U
class Match_CCORR_NORMED_8U : public TemplateMatching
{
public:
explicit Match_CCORR_NORMED_8U(Size user_block_size) : match_CCORR_(user_block_size)
{
}
void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
private:
Match_CCORR_8U match_CCORR_;
GpuMat image_sqsums_;
};
void Match_CCORR_NORMED_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
{
using namespace cv::cuda::device::match_template;
GpuMat image = _image.getGpuMat();
GpuMat templ = _templ.getGpuMat();
CV_Assert( image.depth() == CV_8U );
CV_Assert( image.type() == templ.type() );
CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
match_CCORR_.match(image, templ, _result, stream);
GpuMat result = _result.getGpuMat();
cuda::sqrIntegral(image.reshape(1), image_sqsums_, stream);
double templ_sqsum = cuda::sqrSum(templ.reshape(1))[0];
normalize_8U(templ.cols, templ.rows, image_sqsums_, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
}
///////////////////////////////////////////////////////////////
// SQDIFF_32F
class Match_SQDIFF_32F : public TemplateMatching
{
public:
void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
};
void Match_SQDIFF_32F::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
{
using namespace cv::cuda::device::match_template;
GpuMat image = _image.getGpuMat();
GpuMat templ = _templ.getGpuMat();
CV_Assert( image.depth() == CV_32F );
CV_Assert( image.type() == templ.type() );
CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
_result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
GpuMat result = _result.getGpuMat();
matchTemplateNaive_SQDIFF_32F(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
}
///////////////////////////////////////////////////////////////
// SQDIFF_8U
class Match_SQDIFF_8U : public TemplateMatching
{
public:
explicit Match_SQDIFF_8U(Size user_block_size) : match_CCORR_(user_block_size)
{
}
void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
private:
GpuMat image_sqsums_;
Match_CCORR_8U match_CCORR_;
};
void Match_SQDIFF_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
{
using namespace cv::cuda::device::match_template;
GpuMat image = _image.getGpuMat();
GpuMat templ = _templ.getGpuMat();
CV_Assert( image.depth() == CV_8U );
CV_Assert( image.type() == templ.type() );
CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
if (templ.size().area() < getTemplateThreshold(TM_SQDIFF, CV_8U))
{
_result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
GpuMat result = _result.getGpuMat();
matchTemplateNaive_SQDIFF_8U(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
return;
}
cuda::sqrIntegral(image.reshape(1), image_sqsums_, stream);
double templ_sqsum = cuda::sqrSum(templ.reshape(1))[0];
match_CCORR_.match(image, templ, _result, stream);
GpuMat result = _result.getGpuMat();
matchTemplatePrepared_SQDIFF_8U(templ.cols, templ.rows, image_sqsums_, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
}
///////////////////////////////////////////////////////////////
// SQDIFF_NORMED_8U
class Match_SQDIFF_NORMED_8U : public TemplateMatching
{
public:
explicit Match_SQDIFF_NORMED_8U(Size user_block_size) : match_CCORR_(user_block_size)
{
}
void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
private:
GpuMat image_sqsums_;
Match_CCORR_8U match_CCORR_;
};
void Match_SQDIFF_NORMED_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
{
using namespace cv::cuda::device::match_template;
GpuMat image = _image.getGpuMat();
GpuMat templ = _templ.getGpuMat();
CV_Assert( image.depth() == CV_8U );
CV_Assert( image.type() == templ.type() );
CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
cuda::sqrIntegral(image.reshape(1), image_sqsums_, stream);
double templ_sqsum = cuda::sqrSum(templ.reshape(1))[0];
match_CCORR_.match(image, templ, _result, stream);
GpuMat result = _result.getGpuMat();
matchTemplatePrepared_SQDIFF_NORMED_8U(templ.cols, templ.rows, image_sqsums_, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
}
///////////////////////////////////////////////////////////////
// CCOFF_8U
class Match_CCOEFF_8U : public TemplateMatching
{
public:
explicit Match_CCOEFF_8U(Size user_block_size) : match_CCORR_(user_block_size)
{
}
void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
private:
std::vector<GpuMat> images_;
std::vector<GpuMat> image_sums_;
Match_CCORR_8U match_CCORR_;
};
void Match_CCOEFF_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
{
using namespace cv::cuda::device::match_template;
GpuMat image = _image.getGpuMat();
GpuMat templ = _templ.getGpuMat();
CV_Assert( image.depth() == CV_8U );
CV_Assert( image.type() == templ.type() );
CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
match_CCORR_.match(image, templ, _result, stream);
GpuMat result = _result.getGpuMat();
if (image.channels() == 1)
{
image_sums_.resize(1);
cuda::integral(image, image_sums_[0], stream);
int templ_sum = (int) cuda::sum(templ)[0];
matchTemplatePrepared_CCOFF_8U(templ.cols, templ.rows, image_sums_[0], templ_sum, result, StreamAccessor::getStream(stream));
}
else
{
cuda::split(image, images_);
image_sums_.resize(images_.size());
for (int i = 0; i < image.channels(); ++i)
cuda::integral(images_[i], image_sums_[i], stream);
Scalar templ_sum = cuda::sum(templ);
switch (image.channels())
{
case 2:
matchTemplatePrepared_CCOFF_8UC2(
templ.cols, templ.rows, image_sums_[0], image_sums_[1],
(int) templ_sum[0], (int) templ_sum[1],
result, StreamAccessor::getStream(stream));
break;
case 3:
matchTemplatePrepared_CCOFF_8UC3(
templ.cols, templ.rows, image_sums_[0], image_sums_[1], image_sums_[2],
(int) templ_sum[0], (int) templ_sum[1], (int) templ_sum[2],
result, StreamAccessor::getStream(stream));
break;
case 4:
matchTemplatePrepared_CCOFF_8UC4(
templ.cols, templ.rows, image_sums_[0], image_sums_[1], image_sums_[2], image_sums_[3],
(int) templ_sum[0], (int) templ_sum[1], (int) templ_sum[2], (int) templ_sum[3],
result, StreamAccessor::getStream(stream));
break;
default:
CV_Error(Error::StsBadArg, "unsupported number of channels");
}
}
}
///////////////////////////////////////////////////////////////
// CCOFF_NORMED_8U
class Match_CCOEFF_NORMED_8U : public TemplateMatching
{
public:
explicit Match_CCOEFF_NORMED_8U(Size user_block_size) : match_CCORR_32F_(user_block_size)
{
}
void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
private:
GpuMat imagef_, templf_;
Match_CCORR_32F match_CCORR_32F_;
std::vector<GpuMat> images_;
std::vector<GpuMat> image_sums_;
std::vector<GpuMat> image_sqsums_;
};
void Match_CCOEFF_NORMED_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
{
using namespace cv::cuda::device::match_template;
GpuMat image = _image.getGpuMat();
GpuMat templ = _templ.getGpuMat();
CV_Assert( image.depth() == CV_8U );
CV_Assert( image.type() == templ.type() );
CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
image.convertTo(imagef_, CV_32F, stream);
templ.convertTo(templf_, CV_32F, stream);
match_CCORR_32F_.match(imagef_, templf_, _result, stream);
GpuMat result = _result.getGpuMat();
if (image.channels() == 1)
{
image_sums_.resize(1);
cuda::integral(image, image_sums_[0], stream);
image_sqsums_.resize(1);
cuda::sqrIntegral(image, image_sqsums_[0], stream);
int templ_sum = (int) cuda::sum(templ)[0];
double templ_sqsum = cuda::sqrSum(templ)[0];
matchTemplatePrepared_CCOFF_NORMED_8U(
templ.cols, templ.rows, image_sums_[0], image_sqsums_[0],
templ_sum, templ_sqsum, result, StreamAccessor::getStream(stream));
}
else
{
cuda::split(image, images_);
image_sums_.resize(images_.size());
image_sqsums_.resize(images_.size());
for (int i = 0; i < image.channels(); ++i)
{
cuda::integral(images_[i], image_sums_[i], stream);
cuda::sqrIntegral(images_[i], image_sqsums_[i], stream);
}
Scalar templ_sum = cuda::sum(templ);
Scalar templ_sqsum = cuda::sqrSum(templ);
switch (image.channels())
{
case 2:
matchTemplatePrepared_CCOFF_NORMED_8UC2(
templ.cols, templ.rows,
image_sums_[0], image_sqsums_[0],
image_sums_[1], image_sqsums_[1],
(int)templ_sum[0], templ_sqsum[0],
(int)templ_sum[1], templ_sqsum[1],
result, StreamAccessor::getStream(stream));
break;
case 3:
matchTemplatePrepared_CCOFF_NORMED_8UC3(
templ.cols, templ.rows,
image_sums_[0], image_sqsums_[0],
image_sums_[1], image_sqsums_[1],
image_sums_[2], image_sqsums_[2],
(int)templ_sum[0], templ_sqsum[0],
(int)templ_sum[1], templ_sqsum[1],
(int)templ_sum[2], templ_sqsum[2],
result, StreamAccessor::getStream(stream));
break;
case 4:
matchTemplatePrepared_CCOFF_NORMED_8UC4(
templ.cols, templ.rows,
image_sums_[0], image_sqsums_[0],
image_sums_[1], image_sqsums_[1],
image_sums_[2], image_sqsums_[2],
image_sums_[3], image_sqsums_[3],
(int)templ_sum[0], templ_sqsum[0],
(int)templ_sum[1], templ_sqsum[1],
(int)templ_sum[2], templ_sqsum[2],
(int)templ_sum[3], templ_sqsum[3],
result, StreamAccessor::getStream(stream));
break;
default:
CV_Error(Error::StsBadArg, "unsupported number of channels");
}
}
}
}
Ptr<cuda::TemplateMatching> cv::cuda::createTemplateMatching(int srcType, int method, Size user_block_size)
{
const int sdepth = CV_MAT_DEPTH(srcType);
CV_Assert( sdepth == CV_8U || sdepth == CV_32F );
if (sdepth == CV_32F)
{
switch (method)
{
case TM_SQDIFF:
return makePtr<Match_SQDIFF_32F>();
case TM_CCORR:
return makePtr<Match_CCORR_32F>(user_block_size);
default:
CV_Error( Error::StsBadFlag, "Unsopported method" );
return Ptr<cuda::TemplateMatching>();
}
}
else
{
switch (method)
{
case TM_SQDIFF:
return makePtr<Match_SQDIFF_8U>(user_block_size);
case TM_SQDIFF_NORMED:
return makePtr<Match_SQDIFF_NORMED_8U>(user_block_size);
case TM_CCORR:
return makePtr<Match_CCORR_8U>(user_block_size);
case TM_CCORR_NORMED:
return makePtr<Match_CCORR_NORMED_8U>(user_block_size);
case TM_CCOEFF:
return makePtr<Match_CCOEFF_8U>(user_block_size);
case TM_CCOEFF_NORMED:
return makePtr<Match_CCOEFF_NORMED_8U>(user_block_size);
default:
CV_Error( Error::StsBadFlag, "Unsopported method" );
return Ptr<cuda::TemplateMatching>();
}
}
}
#endif