// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2008 Guillaume Saupin <guillaume.saupin@cea.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_SKYLINEINPLACELU_H #define EIGEN_SKYLINEINPLACELU_H namespace Eigen { /** \ingroup Skyline_Module * * \class SkylineInplaceLU * * \brief Inplace LU decomposition of a skyline matrix and associated features * * \param MatrixType the type of the matrix of which we are computing the LU factorization * */ template<typename MatrixType> class SkylineInplaceLU { protected: typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Index Index; typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar; public: /** Creates a LU object and compute the respective factorization of \a matrix using * flags \a flags. */ SkylineInplaceLU(MatrixType& matrix, int flags = 0) : /*m_matrix(matrix.rows(), matrix.cols()),*/ m_flags(flags), m_status(0), m_lu(matrix) { m_precision = RealScalar(0.1) * Eigen::dummy_precision<RealScalar > (); m_lu.IsRowMajor ? computeRowMajor() : compute(); } /** Sets the relative threshold value used to prune zero coefficients during the decomposition. * * Setting a value greater than zero speeds up computation, and yields to an imcomplete * factorization with fewer non zero coefficients. Such approximate factors are especially * useful to initialize an iterative solver. * * Note that the exact meaning of this parameter might depends on the actual * backend. Moreover, not all backends support this feature. * * \sa precision() */ void setPrecision(RealScalar v) { m_precision = v; } /** \returns the current precision. * * \sa setPrecision() */ RealScalar precision() const { return m_precision; } /** Sets the flags. Possible values are: * - CompleteFactorization * - IncompleteFactorization * - MemoryEfficient * - one of the ordering methods * - etc... * * \sa flags() */ void setFlags(int f) { m_flags = f; } /** \returns the current flags */ int flags() const { return m_flags; } void setOrderingMethod(int m) { m_flags = m; } int orderingMethod() const { return m_flags; } /** Computes/re-computes the LU factorization */ void compute(); void computeRowMajor(); /** \returns the lower triangular matrix L */ //inline const MatrixType& matrixL() const { return m_matrixL; } /** \returns the upper triangular matrix U */ //inline const MatrixType& matrixU() const { return m_matrixU; } template<typename BDerived, typename XDerived> bool solve(const MatrixBase<BDerived> &b, MatrixBase<XDerived>* x, const int transposed = 0) const; /** \returns true if the factorization succeeded */ inline bool succeeded(void) const { return m_succeeded; } protected: RealScalar m_precision; int m_flags; mutable int m_status; bool m_succeeded; MatrixType& m_lu; }; /** Computes / recomputes the in place LU decomposition of the SkylineInplaceLU. * using the default algorithm. */ template<typename MatrixType> //template<typename _Scalar> void SkylineInplaceLU<MatrixType>::compute() { const size_t rows = m_lu.rows(); const size_t cols = m_lu.cols(); eigen_assert(rows == cols && "We do not (yet) support rectangular LU."); eigen_assert(!m_lu.IsRowMajor && "LU decomposition does not work with rowMajor Storage"); for (Index row = 0; row < rows; row++) { const double pivot = m_lu.coeffDiag(row); //Lower matrix Columns update const Index& col = row; for (typename MatrixType::InnerLowerIterator lIt(m_lu, col); lIt; ++lIt) { lIt.valueRef() /= pivot; } //Upper matrix update -> contiguous memory access typename MatrixType::InnerLowerIterator lIt(m_lu, col); for (Index rrow = row + 1; rrow < m_lu.rows(); rrow++) { typename MatrixType::InnerUpperIterator uItPivot(m_lu, row); typename MatrixType::InnerUpperIterator uIt(m_lu, rrow); const double coef = lIt.value(); uItPivot += (rrow - row - 1); //update upper part -> contiguous memory access for (++uItPivot; uIt && uItPivot;) { uIt.valueRef() -= uItPivot.value() * coef; ++uIt; ++uItPivot; } ++lIt; } //Upper matrix update -> non contiguous memory access typename MatrixType::InnerLowerIterator lIt3(m_lu, col); for (Index rrow = row + 1; rrow < m_lu.rows(); rrow++) { typename MatrixType::InnerUpperIterator uItPivot(m_lu, row); const double coef = lIt3.value(); //update lower part -> non contiguous memory access for (Index i = 0; i < rrow - row - 1; i++) { m_lu.coeffRefLower(rrow, row + i + 1) -= uItPivot.value() * coef; ++uItPivot; } ++lIt3; } //update diag -> contiguous typename MatrixType::InnerLowerIterator lIt2(m_lu, col); for (Index rrow = row + 1; rrow < m_lu.rows(); rrow++) { typename MatrixType::InnerUpperIterator uItPivot(m_lu, row); typename MatrixType::InnerUpperIterator uIt(m_lu, rrow); const double coef = lIt2.value(); uItPivot += (rrow - row - 1); m_lu.coeffRefDiag(rrow) -= uItPivot.value() * coef; ++lIt2; } } } template<typename MatrixType> void SkylineInplaceLU<MatrixType>::computeRowMajor() { const size_t rows = m_lu.rows(); const size_t cols = m_lu.cols(); eigen_assert(rows == cols && "We do not (yet) support rectangular LU."); eigen_assert(m_lu.IsRowMajor && "You're trying to apply rowMajor decomposition on a ColMajor matrix !"); for (Index row = 0; row < rows; row++) { typename MatrixType::InnerLowerIterator llIt(m_lu, row); for (Index col = llIt.col(); col < row; col++) { if (m_lu.coeffExistLower(row, col)) { const double diag = m_lu.coeffDiag(col); typename MatrixType::InnerLowerIterator lIt(m_lu, row); typename MatrixType::InnerUpperIterator uIt(m_lu, col); const Index offset = lIt.col() - uIt.row(); Index stop = offset > 0 ? col - lIt.col() : col - uIt.row(); //#define VECTORIZE #ifdef VECTORIZE Map<VectorXd > rowVal(lIt.valuePtr() + (offset > 0 ? 0 : -offset), stop); Map<VectorXd > colVal(uIt.valuePtr() + (offset > 0 ? offset : 0), stop); Scalar newCoeff = m_lu.coeffLower(row, col) - rowVal.dot(colVal); #else if (offset > 0) //Skip zero value of lIt uIt += offset; else //Skip zero values of uIt lIt += -offset; Scalar newCoeff = m_lu.coeffLower(row, col); for (Index k = 0; k < stop; ++k) { const Scalar tmp = newCoeff; newCoeff = tmp - lIt.value() * uIt.value(); ++lIt; ++uIt; } #endif m_lu.coeffRefLower(row, col) = newCoeff / diag; } } //Upper matrix update const Index col = row; typename MatrixType::InnerUpperIterator uuIt(m_lu, col); for (Index rrow = uuIt.row(); rrow < col; rrow++) { typename MatrixType::InnerLowerIterator lIt(m_lu, rrow); typename MatrixType::InnerUpperIterator uIt(m_lu, col); const Index offset = lIt.col() - uIt.row(); Index stop = offset > 0 ? rrow - lIt.col() : rrow - uIt.row(); #ifdef VECTORIZE Map<VectorXd > rowVal(lIt.valuePtr() + (offset > 0 ? 0 : -offset), stop); Map<VectorXd > colVal(uIt.valuePtr() + (offset > 0 ? offset : 0), stop); Scalar newCoeff = m_lu.coeffUpper(rrow, col) - rowVal.dot(colVal); #else if (offset > 0) //Skip zero value of lIt uIt += offset; else //Skip zero values of uIt lIt += -offset; Scalar newCoeff = m_lu.coeffUpper(rrow, col); for (Index k = 0; k < stop; ++k) { const Scalar tmp = newCoeff; newCoeff = tmp - lIt.value() * uIt.value(); ++lIt; ++uIt; } #endif m_lu.coeffRefUpper(rrow, col) = newCoeff; } //Diag matrix update typename MatrixType::InnerLowerIterator lIt(m_lu, row); typename MatrixType::InnerUpperIterator uIt(m_lu, row); const Index offset = lIt.col() - uIt.row(); Index stop = offset > 0 ? lIt.size() : uIt.size(); #ifdef VECTORIZE Map<VectorXd > rowVal(lIt.valuePtr() + (offset > 0 ? 0 : -offset), stop); Map<VectorXd > colVal(uIt.valuePtr() + (offset > 0 ? offset : 0), stop); Scalar newCoeff = m_lu.coeffDiag(row) - rowVal.dot(colVal); #else if (offset > 0) //Skip zero value of lIt uIt += offset; else //Skip zero values of uIt lIt += -offset; Scalar newCoeff = m_lu.coeffDiag(row); for (Index k = 0; k < stop; ++k) { const Scalar tmp = newCoeff; newCoeff = tmp - lIt.value() * uIt.value(); ++lIt; ++uIt; } #endif m_lu.coeffRefDiag(row) = newCoeff; } } /** Computes *x = U^-1 L^-1 b * * If \a transpose is set to SvTranspose or SvAdjoint, the solution * of the transposed/adjoint system is computed instead. * * Not all backends implement the solution of the transposed or * adjoint system. */ template<typename MatrixType> template<typename BDerived, typename XDerived> bool SkylineInplaceLU<MatrixType>::solve(const MatrixBase<BDerived> &b, MatrixBase<XDerived>* x, const int transposed) const { const size_t rows = m_lu.rows(); const size_t cols = m_lu.cols(); for (Index row = 0; row < rows; row++) { x->coeffRef(row) = b.coeff(row); Scalar newVal = x->coeff(row); typename MatrixType::InnerLowerIterator lIt(m_lu, row); Index col = lIt.col(); while (lIt.col() < row) { newVal -= x->coeff(col++) * lIt.value(); ++lIt; } x->coeffRef(row) = newVal; } for (Index col = rows - 1; col > 0; col--) { x->coeffRef(col) = x->coeff(col) / m_lu.coeffDiag(col); const Scalar x_col = x->coeff(col); typename MatrixType::InnerUpperIterator uIt(m_lu, col); uIt += uIt.size()-1; while (uIt) { x->coeffRef(uIt.row()) -= x_col * uIt.value(); //TODO : introduce --operator uIt += -1; } } x->coeffRef(0) = x->coeff(0) / m_lu.coeffDiag(0); return true; } } // end namespace Eigen #endif // EIGEN_SKYLINELU_H