/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #include "precomp.hpp" using namespace cv; using namespace cv::cuda; #if !defined (HAVE_CUDA) || !defined (HAVE_OPENCV_CUDAARITHM) || defined (CUDA_DISABLER) Ptr<cuda::TemplateMatching> cv::cuda::createTemplateMatching(int, int, Size) { throw_no_cuda(); return Ptr<cuda::TemplateMatching>(); } #else namespace cv { namespace cuda { namespace device { namespace match_template { void matchTemplateNaive_CCORR_8U(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream); void matchTemplateNaive_CCORR_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream); void matchTemplateNaive_SQDIFF_8U(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream); void matchTemplateNaive_SQDIFF_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream); void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<double> image_sqsum, double templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream); void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz<double> image_sqsum, double templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream); void matchTemplatePrepared_CCOFF_8U(int w, int h, const PtrStepSz<int> image_sum, int templ_sum, PtrStepSzf result, cudaStream_t stream); void matchTemplatePrepared_CCOFF_8UC2( int w, int h, const PtrStepSz<int> image_sum_r, const PtrStepSz<int> image_sum_g, int templ_sum_r, int templ_sum_g, PtrStepSzf result, cudaStream_t stream); void matchTemplatePrepared_CCOFF_8UC3( int w, int h, const PtrStepSz<int> image_sum_r, const PtrStepSz<int> image_sum_g, const PtrStepSz<int> image_sum_b, int templ_sum_r, int templ_sum_g, int templ_sum_b, PtrStepSzf result, cudaStream_t stream); void matchTemplatePrepared_CCOFF_8UC4( int w, int h, const PtrStepSz<int> image_sum_r, const PtrStepSz<int> image_sum_g, const PtrStepSz<int> image_sum_b, const PtrStepSz<int> image_sum_a, int templ_sum_r, int templ_sum_g, int templ_sum_b, int templ_sum_a, PtrStepSzf result, cudaStream_t stream); void matchTemplatePrepared_CCOFF_NORMED_8U( int w, int h, const PtrStepSz<int> image_sum, const PtrStepSz<double> image_sqsum, int templ_sum, double templ_sqsum, PtrStepSzf result, cudaStream_t stream); void matchTemplatePrepared_CCOFF_NORMED_8UC2( int w, int h, const PtrStepSz<int> image_sum_r, const PtrStepSz<double> image_sqsum_r, const PtrStepSz<int> image_sum_g, const PtrStepSz<double> image_sqsum_g, int templ_sum_r, double templ_sqsum_r, int templ_sum_g, double templ_sqsum_g, PtrStepSzf result, cudaStream_t stream); void matchTemplatePrepared_CCOFF_NORMED_8UC3( int w, int h, const PtrStepSz<int> image_sum_r, const PtrStepSz<double> image_sqsum_r, const PtrStepSz<int> image_sum_g, const PtrStepSz<double> image_sqsum_g, const PtrStepSz<int> image_sum_b, const PtrStepSz<double> image_sqsum_b, int templ_sum_r, double templ_sqsum_r, int templ_sum_g, double templ_sqsum_g, int templ_sum_b, double templ_sqsum_b, PtrStepSzf result, cudaStream_t stream); void matchTemplatePrepared_CCOFF_NORMED_8UC4( int w, int h, const PtrStepSz<int> image_sum_r, const PtrStepSz<double> image_sqsum_r, const PtrStepSz<int> image_sum_g, const PtrStepSz<double> image_sqsum_g, const PtrStepSz<int> image_sum_b, const PtrStepSz<double> image_sqsum_b, const PtrStepSz<int> image_sum_a, const PtrStepSz<double> image_sqsum_a, int templ_sum_r, double templ_sqsum_r, int templ_sum_g, double templ_sqsum_g, int templ_sum_b, double templ_sqsum_b, int templ_sum_a, double templ_sqsum_a, PtrStepSzf result, cudaStream_t stream); void normalize_8U(int w, int h, const PtrStepSz<double> image_sqsum, double templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream); void extractFirstChannel_32F(const PtrStepSzb image, PtrStepSzf result, int cn, cudaStream_t stream); } }}} namespace { // Evaluates optimal template's area threshold. If // template's area is less than the threshold, we use naive match // template version, otherwise FFT-based (if available) int getTemplateThreshold(int method, int depth) { switch (method) { case TM_CCORR: if (depth == CV_32F) return 250; if (depth == CV_8U) return 300; break; case TM_SQDIFF: if (depth == CV_8U) return 300; break; } CV_Error(Error::StsBadArg, "unsupported match template mode"); return 0; } /////////////////////////////////////////////////////////////// // CCORR_32F class Match_CCORR_32F : public TemplateMatching { public: explicit Match_CCORR_32F(Size user_block_size); void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null()); private: Ptr<cuda::Convolution> conv_; GpuMat result_; }; Match_CCORR_32F::Match_CCORR_32F(Size user_block_size) { conv_ = cuda::createConvolution(user_block_size); } void Match_CCORR_32F::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& _stream) { using namespace cv::cuda::device::match_template; GpuMat image = _image.getGpuMat(); GpuMat templ = _templ.getGpuMat(); CV_Assert( image.depth() == CV_32F ); CV_Assert( image.type() == templ.type() ); CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows ); cudaStream_t stream = StreamAccessor::getStream(_stream); _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1); GpuMat result = _result.getGpuMat(); if (templ.size().area() < getTemplateThreshold(TM_CCORR, CV_32F)) { matchTemplateNaive_CCORR_32F(image, templ, result, image.channels(), stream); return; } if (image.channels() == 1) { conv_->convolve(image.reshape(1), templ.reshape(1), result, true, _stream); } else { conv_->convolve(image.reshape(1), templ.reshape(1), result_, true, _stream); extractFirstChannel_32F(result_, result, image.channels(), stream); } } /////////////////////////////////////////////////////////////// // CCORR_8U class Match_CCORR_8U : public TemplateMatching { public: explicit Match_CCORR_8U(Size user_block_size) : match32F_(user_block_size) { } void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null()); private: GpuMat imagef_, templf_; Match_CCORR_32F match32F_; }; void Match_CCORR_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream) { using namespace cv::cuda::device::match_template; GpuMat image = _image.getGpuMat(); GpuMat templ = _templ.getGpuMat(); CV_Assert( image.depth() == CV_8U ); CV_Assert( image.type() == templ.type() ); CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows ); if (templ.size().area() < getTemplateThreshold(TM_CCORR, CV_8U)) { _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1); GpuMat result = _result.getGpuMat(); matchTemplateNaive_CCORR_8U(image, templ, result, image.channels(), StreamAccessor::getStream(stream)); return; } image.convertTo(imagef_, CV_32F, stream); templ.convertTo(templf_, CV_32F, stream); match32F_.match(imagef_, templf_, _result, stream); } /////////////////////////////////////////////////////////////// // CCORR_NORMED_8U class Match_CCORR_NORMED_8U : public TemplateMatching { public: explicit Match_CCORR_NORMED_8U(Size user_block_size) : match_CCORR_(user_block_size) { } void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null()); private: Match_CCORR_8U match_CCORR_; GpuMat image_sqsums_; }; void Match_CCORR_NORMED_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream) { using namespace cv::cuda::device::match_template; GpuMat image = _image.getGpuMat(); GpuMat templ = _templ.getGpuMat(); CV_Assert( image.depth() == CV_8U ); CV_Assert( image.type() == templ.type() ); CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows ); match_CCORR_.match(image, templ, _result, stream); GpuMat result = _result.getGpuMat(); cuda::sqrIntegral(image.reshape(1), image_sqsums_, stream); double templ_sqsum = cuda::sqrSum(templ.reshape(1))[0]; normalize_8U(templ.cols, templ.rows, image_sqsums_, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream)); } /////////////////////////////////////////////////////////////// // SQDIFF_32F class Match_SQDIFF_32F : public TemplateMatching { public: void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null()); }; void Match_SQDIFF_32F::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream) { using namespace cv::cuda::device::match_template; GpuMat image = _image.getGpuMat(); GpuMat templ = _templ.getGpuMat(); CV_Assert( image.depth() == CV_32F ); CV_Assert( image.type() == templ.type() ); CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows ); _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1); GpuMat result = _result.getGpuMat(); matchTemplateNaive_SQDIFF_32F(image, templ, result, image.channels(), StreamAccessor::getStream(stream)); } /////////////////////////////////////////////////////////////// // SQDIFF_8U class Match_SQDIFF_8U : public TemplateMatching { public: explicit Match_SQDIFF_8U(Size user_block_size) : match_CCORR_(user_block_size) { } void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null()); private: GpuMat image_sqsums_; Match_CCORR_8U match_CCORR_; }; void Match_SQDIFF_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream) { using namespace cv::cuda::device::match_template; GpuMat image = _image.getGpuMat(); GpuMat templ = _templ.getGpuMat(); CV_Assert( image.depth() == CV_8U ); CV_Assert( image.type() == templ.type() ); CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows ); if (templ.size().area() < getTemplateThreshold(TM_SQDIFF, CV_8U)) { _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1); GpuMat result = _result.getGpuMat(); matchTemplateNaive_SQDIFF_8U(image, templ, result, image.channels(), StreamAccessor::getStream(stream)); return; } cuda::sqrIntegral(image.reshape(1), image_sqsums_, stream); double templ_sqsum = cuda::sqrSum(templ.reshape(1))[0]; match_CCORR_.match(image, templ, _result, stream); GpuMat result = _result.getGpuMat(); matchTemplatePrepared_SQDIFF_8U(templ.cols, templ.rows, image_sqsums_, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream)); } /////////////////////////////////////////////////////////////// // SQDIFF_NORMED_8U class Match_SQDIFF_NORMED_8U : public TemplateMatching { public: explicit Match_SQDIFF_NORMED_8U(Size user_block_size) : match_CCORR_(user_block_size) { } void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null()); private: GpuMat image_sqsums_; Match_CCORR_8U match_CCORR_; }; void Match_SQDIFF_NORMED_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream) { using namespace cv::cuda::device::match_template; GpuMat image = _image.getGpuMat(); GpuMat templ = _templ.getGpuMat(); CV_Assert( image.depth() == CV_8U ); CV_Assert( image.type() == templ.type() ); CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows ); cuda::sqrIntegral(image.reshape(1), image_sqsums_, stream); double templ_sqsum = cuda::sqrSum(templ.reshape(1))[0]; match_CCORR_.match(image, templ, _result, stream); GpuMat result = _result.getGpuMat(); matchTemplatePrepared_SQDIFF_NORMED_8U(templ.cols, templ.rows, image_sqsums_, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream)); } /////////////////////////////////////////////////////////////// // CCOFF_8U class Match_CCOEFF_8U : public TemplateMatching { public: explicit Match_CCOEFF_8U(Size user_block_size) : match_CCORR_(user_block_size) { } void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null()); private: std::vector<GpuMat> images_; std::vector<GpuMat> image_sums_; Match_CCORR_8U match_CCORR_; }; void Match_CCOEFF_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream) { using namespace cv::cuda::device::match_template; GpuMat image = _image.getGpuMat(); GpuMat templ = _templ.getGpuMat(); CV_Assert( image.depth() == CV_8U ); CV_Assert( image.type() == templ.type() ); CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows ); match_CCORR_.match(image, templ, _result, stream); GpuMat result = _result.getGpuMat(); if (image.channels() == 1) { image_sums_.resize(1); cuda::integral(image, image_sums_[0], stream); int templ_sum = (int) cuda::sum(templ)[0]; matchTemplatePrepared_CCOFF_8U(templ.cols, templ.rows, image_sums_[0], templ_sum, result, StreamAccessor::getStream(stream)); } else { cuda::split(image, images_); image_sums_.resize(images_.size()); for (int i = 0; i < image.channels(); ++i) cuda::integral(images_[i], image_sums_[i], stream); Scalar templ_sum = cuda::sum(templ); switch (image.channels()) { case 2: matchTemplatePrepared_CCOFF_8UC2( templ.cols, templ.rows, image_sums_[0], image_sums_[1], (int) templ_sum[0], (int) templ_sum[1], result, StreamAccessor::getStream(stream)); break; case 3: matchTemplatePrepared_CCOFF_8UC3( templ.cols, templ.rows, image_sums_[0], image_sums_[1], image_sums_[2], (int) templ_sum[0], (int) templ_sum[1], (int) templ_sum[2], result, StreamAccessor::getStream(stream)); break; case 4: matchTemplatePrepared_CCOFF_8UC4( templ.cols, templ.rows, image_sums_[0], image_sums_[1], image_sums_[2], image_sums_[3], (int) templ_sum[0], (int) templ_sum[1], (int) templ_sum[2], (int) templ_sum[3], result, StreamAccessor::getStream(stream)); break; default: CV_Error(Error::StsBadArg, "unsupported number of channels"); } } } /////////////////////////////////////////////////////////////// // CCOFF_NORMED_8U class Match_CCOEFF_NORMED_8U : public TemplateMatching { public: explicit Match_CCOEFF_NORMED_8U(Size user_block_size) : match_CCORR_32F_(user_block_size) { } void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null()); private: GpuMat imagef_, templf_; Match_CCORR_32F match_CCORR_32F_; std::vector<GpuMat> images_; std::vector<GpuMat> image_sums_; std::vector<GpuMat> image_sqsums_; }; void Match_CCOEFF_NORMED_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream) { using namespace cv::cuda::device::match_template; GpuMat image = _image.getGpuMat(); GpuMat templ = _templ.getGpuMat(); CV_Assert( image.depth() == CV_8U ); CV_Assert( image.type() == templ.type() ); CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows ); image.convertTo(imagef_, CV_32F, stream); templ.convertTo(templf_, CV_32F, stream); match_CCORR_32F_.match(imagef_, templf_, _result, stream); GpuMat result = _result.getGpuMat(); if (image.channels() == 1) { image_sums_.resize(1); cuda::integral(image, image_sums_[0], stream); image_sqsums_.resize(1); cuda::sqrIntegral(image, image_sqsums_[0], stream); int templ_sum = (int) cuda::sum(templ)[0]; double templ_sqsum = cuda::sqrSum(templ)[0]; matchTemplatePrepared_CCOFF_NORMED_8U( templ.cols, templ.rows, image_sums_[0], image_sqsums_[0], templ_sum, templ_sqsum, result, StreamAccessor::getStream(stream)); } else { cuda::split(image, images_); image_sums_.resize(images_.size()); image_sqsums_.resize(images_.size()); for (int i = 0; i < image.channels(); ++i) { cuda::integral(images_[i], image_sums_[i], stream); cuda::sqrIntegral(images_[i], image_sqsums_[i], stream); } Scalar templ_sum = cuda::sum(templ); Scalar templ_sqsum = cuda::sqrSum(templ); switch (image.channels()) { case 2: matchTemplatePrepared_CCOFF_NORMED_8UC2( templ.cols, templ.rows, image_sums_[0], image_sqsums_[0], image_sums_[1], image_sqsums_[1], (int)templ_sum[0], templ_sqsum[0], (int)templ_sum[1], templ_sqsum[1], result, StreamAccessor::getStream(stream)); break; case 3: matchTemplatePrepared_CCOFF_NORMED_8UC3( templ.cols, templ.rows, image_sums_[0], image_sqsums_[0], image_sums_[1], image_sqsums_[1], image_sums_[2], image_sqsums_[2], (int)templ_sum[0], templ_sqsum[0], (int)templ_sum[1], templ_sqsum[1], (int)templ_sum[2], templ_sqsum[2], result, StreamAccessor::getStream(stream)); break; case 4: matchTemplatePrepared_CCOFF_NORMED_8UC4( templ.cols, templ.rows, image_sums_[0], image_sqsums_[0], image_sums_[1], image_sqsums_[1], image_sums_[2], image_sqsums_[2], image_sums_[3], image_sqsums_[3], (int)templ_sum[0], templ_sqsum[0], (int)templ_sum[1], templ_sqsum[1], (int)templ_sum[2], templ_sqsum[2], (int)templ_sum[3], templ_sqsum[3], result, StreamAccessor::getStream(stream)); break; default: CV_Error(Error::StsBadArg, "unsupported number of channels"); } } } } Ptr<cuda::TemplateMatching> cv::cuda::createTemplateMatching(int srcType, int method, Size user_block_size) { const int sdepth = CV_MAT_DEPTH(srcType); CV_Assert( sdepth == CV_8U || sdepth == CV_32F ); if (sdepth == CV_32F) { switch (method) { case TM_SQDIFF: return makePtr<Match_SQDIFF_32F>(); case TM_CCORR: return makePtr<Match_CCORR_32F>(user_block_size); default: CV_Error( Error::StsBadFlag, "Unsopported method" ); return Ptr<cuda::TemplateMatching>(); } } else { switch (method) { case TM_SQDIFF: return makePtr<Match_SQDIFF_8U>(user_block_size); case TM_SQDIFF_NORMED: return makePtr<Match_SQDIFF_NORMED_8U>(user_block_size); case TM_CCORR: return makePtr<Match_CCORR_8U>(user_block_size); case TM_CCORR_NORMED: return makePtr<Match_CCORR_NORMED_8U>(user_block_size); case TM_CCOEFF: return makePtr<Match_CCOEFF_8U>(user_block_size); case TM_CCOEFF_NORMED: return makePtr<Match_CCOEFF_NORMED_8U>(user_block_size); default: CV_Error( Error::StsBadFlag, "Unsopported method" ); return Ptr<cuda::TemplateMatching>(); } } } #endif