diff --git a/cmake/Config.cmake b/cmake/Config.cmake index cf8ff35b..49c5ae12 100644 --- a/cmake/Config.cmake +++ b/cmake/Config.cmake @@ -2,7 +2,7 @@ set( PREPROCESSOR_DEFINES UMPIRE CHAI CUDA - HIP + HIP TOTALVIEW_OUTPUT CALIPER ) diff --git a/host-configs/LLNL/dane-clang-14-mpm.cmake b/host-configs/LLNL/dane-clang-14-mpm.cmake new file mode 100644 index 00000000..2c17649d --- /dev/null +++ b/host-configs/LLNL/dane-clang-14-mpm.cmake @@ -0,0 +1,5 @@ +set(CONFIG_NAME "dane-clang-14-mpm" CACHE PATH "") + +set(GEOS_TPL_DIR /usr/WS1/crook5/thirdPartyLibs/install-dane-clang-14-mpm-release CACHE PATH "") + +include(${CMAKE_CURRENT_LIST_DIR}/llnl-cpu-clang-14.cmake) diff --git a/host-configs/LLNL/dane-gcc-12-mpm.cmake b/host-configs/LLNL/dane-gcc-12-mpm.cmake new file mode 100644 index 00000000..86adf6be --- /dev/null +++ b/host-configs/LLNL/dane-gcc-12-mpm.cmake @@ -0,0 +1,5 @@ +set(CONFIG_NAME "dane-gcc-12-mpm" CACHE PATH "") + +set(GEOS_TPL_DIR /usr/WS1/crook5/thirdPartyLibs/install-dane-gcc-12-mpm-release CACHE PATH "") + +include(${CMAKE_CURRENT_LIST_DIR}/llnl-cpu-gcc-12.cmake) diff --git a/src/ArrayOfArrays.hpp b/src/ArrayOfArrays.hpp index 61facade..c78109c9 100644 --- a/src/ArrayOfArrays.hpp +++ b/src/ArrayOfArrays.hpp @@ -484,4 +484,21 @@ class ArrayOfArrays : protected ArrayOfArraysView< T, INDEX_TYPE, false, BUFFER_ } }; +/** + * @brief True if the template type is an ArrayOfArrays. + */ +template< class > +constexpr bool isArrayOfArrays = false; + +/** + * @tparam T The type contained in the ArrayOfArrays. + * @tparam INDEX_TYPE The integral type used as an index. + * @tparam BUFFER_TYPE The type used to manage the underlying allocation. + * @brief Specialization of isArrayOfArrays for the ArrayOfArrays class. + */ +template< typename T, + typename INDEX_TYPE, + template< typename > class BUFFER_TYPE > +constexpr bool isArrayOfArrays< ArrayOfArrays< T, INDEX_TYPE, BUFFER_TYPE > > = true; + } /* namespace LvArray */ diff --git a/src/ArrayOfArraysView.hpp b/src/ArrayOfArraysView.hpp index f07e93d7..85ecfd13 100644 --- a/src/ArrayOfArraysView.hpp +++ b/src/ArrayOfArraysView.hpp @@ -249,7 +249,7 @@ class ArrayOfArraysView /** * @brief Move assignment operator.. - * @param src the SparsityPatternView to be moved from. + * @param src the ArrayOfArraysView to be moved from. * @return *this. */ LVARRAY_HOST_DEVICE @@ -1087,4 +1087,23 @@ class ArrayOfArraysView } }; +/** + * @brief True if the template type is an ArrayOfArraysView. + */ +template< class > +constexpr bool isArrayOfArraysView = false; + +/** + * @tparam T The type contained in the ArrayOfArraysView. + * @tparam INDEX_TYPE The integral type used as an index. + * @tparam CONST_SIZES True iff the size of each array is constant. + * @tparam BUFFER_TYPE The type used to manager the underlying allocation. + * @brief Specialization of isArrayOfArraysView for the ArrayOfArraysView class. + */ +template< typename T, + typename INDEX_TYPE, + bool CONST_SIZES, + template< typename > class BUFFER_TYPE > +constexpr bool isArrayOfArraysView< ArrayOfArraysView< T, INDEX_TYPE, CONST_SIZES, BUFFER_TYPE > > = true; + } /* namespace LvArray */ diff --git a/src/ChaiBuffer.hpp b/src/ChaiBuffer.hpp index 8716330e..19fe010c 100644 --- a/src/ChaiBuffer.hpp +++ b/src/ChaiBuffer.hpp @@ -340,7 +340,7 @@ class ChaiBuffer if( size > 0 ) { - LVARRAY_ERROR_IF_NE_MSG( space, MemorySpace::host, "Calling reallocate with a non-zero current size is not yet supporeted for the GPU." ); + LVARRAY_ERROR_IF_NE_MSG( space, MemorySpace::host, "Calling reallocate with a non-zero current size is not yet supported for the GPU." ); std::ptrdiff_t const overlapAmount = std::min( newCapacity, size ); arrayManipulation::uninitializedMove( newPointer, overlapAmount, m_pointer ); arrayManipulation::destroy( m_pointer, size ); diff --git a/src/fixedSizeSquareMatrixOps.hpp b/src/fixedSizeSquareMatrixOps.hpp index c61e4a9e..f39c3f15 100644 --- a/src/fixedSizeSquareMatrixOps.hpp +++ b/src/fixedSizeSquareMatrixOps.hpp @@ -38,6 +38,42 @@ LVARRAY_HOST_DEVICE CONSTEXPR_WITHOUT_BOUNDS_CHECK inline auto determinant( MATRIX const & matrix ) { return internal::SquareMatrixOps< M >::determinant( matrix ); } +/** + * @brief Compute the cofactor of the source matrix @p srcMatrix and store the result in @p dstMatrix. + * @tparam M The size of the matrices @p dstMatrix and @p srcMatrix. + * @tparam DST_MATRIX The type of @p dstMatrix. + * @tparam SRC_MATRIX The type of @p srcMatrix. + * @param dstMatrix The M x M matrix to write the cofactor to. + * @param srcMatrix The M x M matrix to take the cofactor of. + * @note @p srcMatrix can contain integers but @p dstMatrix must contain floating point values. + */ +template< std::ptrdiff_t M, typename DST_MATRIX, typename SRC_MATRIX > +LVARRAY_HOST_DEVICE CONSTEXPR_WITHOUT_BOUNDS_CHECK inline +auto cofactor( DST_MATRIX && LVARRAY_RESTRICT_REF dstMatrix, + SRC_MATRIX const & LVARRAY_RESTRICT_REF srcMatrix ) +{ + static_assert( std::is_floating_point< std::decay_t< decltype( dstMatrix[ 0 ][ 0 ] ) > >::value, + "The destination matrix must be contain floating point values." ); + internal::SquareMatrixOps< M >::cofactor( std::forward< DST_MATRIX >( dstMatrix ), srcMatrix ); +} + +/** + * @brief Compute the cofactor of the matrix @p matrix overwritting it. + * @tparam M The size of the matrix @p matrix. + * @tparam MATRIX The type of @p matrix. + * @param matrix The M x M matrix to take the cofactor of and overwrite. + * @return The determinant. + * @note @p matrix must contain floating point values. + */ +template< std::ptrdiff_t M, typename MATRIX > +LVARRAY_HOST_DEVICE CONSTEXPR_WITHOUT_BOUNDS_CHECK inline +auto cofactor( MATRIX && matrix ) +{ + static_assert( std::is_floating_point< std::decay_t< decltype( matrix[ 0 ][ 0 ] ) > >::value, + "The matrix must be contain floating point values." ); + internal::SquareMatrixOps< M >::cofactor( std::forward< MATRIX >( matrix ) ); +} + /** * @brief Invert the source matrix @p srcMatrix and store the result in @p dstMatrix. * @tparam M The size of the matrices @p dstMatrix and @p srcMatrix. @@ -354,6 +390,24 @@ void symmetricToDense( DST_MATRIX && dstMatrix, SRC_SYM_MATRIX const & srcSymMat srcSymMatrix ); } +/** + * @brief Determine the polar decomposition of the matrix @p srcMatrix + * @tparam M The size of @p R and @p srcMatrix. + * @tparam DST_MATRIX The type of @p R. + * @tparam MATRIX The type of @p srcMatrix. + * @param R The resultant rotation matrix. + * @param matrix The matrix to be decomposed. + * @details The polar decomposition returns a rotation matrix such that @p R . U = V . @p R = @p srcMatrix. + * This is done using Higham's iterative algorithm. + */ +template< std::ptrdiff_t M, typename DST_MATRIX, typename MATRIX > +LVARRAY_HOST_DEVICE constexpr inline +void polarDecomposition( DST_MATRIX && R, MATRIX const & srcMatrix ) +{ + return internal::SquareMatrixOps< M >::polarDecomposition( std::forward< DST_MATRIX >( R ), + srcMatrix ); +} + ///@} } // namespace tensorOps diff --git a/src/fixedSizeSquareMatrixOpsImpl.hpp b/src/fixedSizeSquareMatrixOpsImpl.hpp index f006d866..9c2533d2 100644 --- a/src/fixedSizeSquareMatrixOpsImpl.hpp +++ b/src/fixedSizeSquareMatrixOpsImpl.hpp @@ -13,6 +13,7 @@ #pragma once #include "genericTensorOps.hpp" +#include "limits.hpp" namespace LvArray { @@ -71,6 +72,65 @@ template< std::ptrdiff_t M > struct SquareMatrixOps {}; +/** + * @brief Determine the polar decomposition of @p matrix + * @tparam DST_MATRIX The type of @p R. + * @tparam MATRIX The type of @p matrix. + * @param R The resultant orthogonal matrix. + * @param matrix The matrix to be decomposed. + * @details The polar decomposition returns an orthogonal matrix such that @p R . U = V . @p R = @p matrix. + * This is done using Higham's iterative algorithm. + */ +template< std::ptrdiff_t M, typename DST_MATRIX, typename MATRIX > +LVARRAY_HOST_DEVICE inline +static void polarDecompositionBase( DST_MATRIX && LVARRAY_RESTRICT_REF R, + MATRIX const & LVARRAY_RESTRICT_REF matrix ) +{ + checkSizes< M, M >( R ); + checkSizes< M, M >( matrix ); + + using FloatingPoint = std::decay_t< decltype( R[0][0] ) >; + + // Initialize + copy< M, M >( R, matrix ); + FloatingPoint RInverse[M][M] = { {0} }, + RInverseTranspose[M][M] = { {0} }, + RRTMinusI[M][M] = { {0} }; + + // Higham Algorithm + FloatingPoint errorSquared = 1.0; + FloatingPoint tolerance = 10 * LvArray::NumericLimits< FloatingPoint >::epsilon; + int iter = 0; + while( errorSquared > tolerance * tolerance && iter < 100 ) + { + iter++; + errorSquared = 0.0; + + // Average the current R with its inverse tranpose + SquareMatrixOps< M >::invert( RInverse, R ); + transpose< M, M >( RInverseTranspose, RInverse ); + add< M, M >( R, RInverseTranspose ); + scale< M, M >( R, 0.5 ); + + // Determine how close R is to being orthogonal using L2Norm(R.R^T-I) + FloatingPoint copyR[M][M] = { { 0.0 } }; + copy< M, M >( copyR, R ); + Rij_eq_AikBjk< M, M, M >( RRTMinusI, R, copyR ); + addIdentity< M >( RRTMinusI, -1.0 ); + for( std::ptrdiff_t i = 0; i < M; i++ ) + { + for( std::ptrdiff_t j = 0; j < M; j++ ) + { + errorSquared += RRTMinusI[i][j] * RRTMinusI[i][j]; + } + } + } + if( iter == 100 ) + { + printf( "Polar decomposition did not converge in 100 iterations!" ); + } +} + /** * @struct SquareMatrixOps< 2 > * @brief Performs operations on 2x2 square matrices. @@ -91,6 +151,26 @@ struct SquareMatrixOps< 2 > return matrix[ 0 ][ 0 ] * matrix[ 1 ][ 1 ] - matrix[ 0 ][ 1 ] * matrix[ 1 ][ 0 ]; } + /** + * @return Compute the cofactor of the source matrix @p srcMatrix and store the result in @p dstMatrix + * @tparam DST_MATRIX The type of @p dstMatrix. + * @tparam SRC_MATRIX The type of @p srcMatrix. + * @param dstMatrix The 2x2 matrix to write the cofactor to. + * @param srcMatrix The 2x2 matrix to take the cofactor of. + */ + template< typename DST_MATRIX, typename SRC_MATRIX > + LVARRAY_HOST_DEVICE CONSTEXPR_WITHOUT_BOUNDS_CHECK inline + static auto cofactor( DST_MATRIX && LVARRAY_RESTRICT_REF dstMatrix, + SRC_MATRIX const & LVARRAY_RESTRICT_REF srcMatrix ) + { + checkSizes< 2, 2 >( dstMatrix ); + checkSizes< 2, 2 >( srcMatrix ); + dstMatrix[0][0] = srcMatrix[1][1]; + dstMatrix[1][1] = srcMatrix[0][0]; + dstMatrix[0][1] = -srcMatrix[1][0]; + dstMatrix[1][0] = -srcMatrix[0][1]; + } + /** * @brief Invert the source matrix @p srcMatrix and store the result in @p dstMatrix. * @tparam DST_MATRIX The type of @p dstMatrix. @@ -121,6 +201,24 @@ struct SquareMatrixOps< 2 > return det; } + /** + * @brief Compute the cofactor of the matrix @p srcMatrix overwritting it. + * @tparam MATRIX The type of @p matrix. + * @param matrix The 2x2 matrix to take the cofactor of and overwrite. + * @note @p matrix must contain floating point values. + */ + template< typename MATRIX > + LVARRAY_HOST_DEVICE CONSTEXPR_WITHOUT_BOUNDS_CHECK inline + static auto cofactor( MATRIX && matrix ) + { + checkSizes< 2, 2 >( matrix ); + + using realType = std::remove_reference_t< decltype( matrix[ 0 ][ 0 ] ) >; + realType temp[ 2 ][ 2 ]; + copy< 2, 2 >( temp, matrix ); + return cofactor( matrix, temp ); + } + /** * @brief Invert the matrix @p srcMatrix overwritting it. * @tparam MATRIX The type of @p matrix. @@ -529,8 +627,24 @@ struct SquareMatrixOps< 2 > dstMatrix[ 1 ][ 0 ] = srcSymMatrix[ 2 ]; } -private: + /** + * @brief Determine the polar decomposition of the 2x2 matrix @p matrix + * @tparam DST_MATRIX The type of @p R. + * @tparam MATRIX The type of @p matrix. + * @param R The resultant orthogonal matrix. + * @param matrix The matrix to be decomposed. + * @details The polar decomposition returns an orthogonal matrix such that @p R . U = V . @p R = @p matrix. + * This is done using Higham's iterative algorithm. + */ + template< typename DST_MATRIX, typename MATRIX > + LVARRAY_HOST_DEVICE CONSTEXPR_WITHOUT_BOUNDS_CHECK inline + static void polarDecomposition( DST_MATRIX && LVARRAY_RESTRICT_REF R, + MATRIX const & LVARRAY_RESTRICT_REF matrix ) + { + polarDecompositionBase< 2 >( R, matrix ); + } +private: /** * @brief Compute the eigenvalues of the 2x2 symmetric matrix @p matrix. * @tparam FloatingPoint A floating point type. @@ -579,6 +693,31 @@ struct SquareMatrixOps< 3 > matrix[ 2 ][ 0 ] * ( matrix[ 0 ][ 1 ] * matrix[ 1 ][ 2 ] - matrix[ 0 ][ 2 ] * matrix[ 1 ][ 1 ] ); } + /** + * @return Compute the cofactor of the source matrix @p srcMatrix and store the result in @p dstMatrix + * @tparam DST_MATRIX The type of @p dstMatrix. + * @tparam SRC_MATRIX The type of @p srcMatrix. + * @param dstMatrix The 3x3 matrix to write the cofactor to. + * @param srcMatrix The 3x3 matrix to take the cofactor of. + */ + template< typename DST_MATRIX, typename SRC_MATRIX > + LVARRAY_HOST_DEVICE CONSTEXPR_WITHOUT_BOUNDS_CHECK inline + static auto cofactor( DST_MATRIX && LVARRAY_RESTRICT_REF dstMatrix, + SRC_MATRIX const & LVARRAY_RESTRICT_REF srcMatrix ) + { + checkSizes< 3, 3 >( dstMatrix ); + checkSizes< 3, 3 >( srcMatrix ); + dstMatrix[0][0] = srcMatrix[1][1] * srcMatrix[2][2] - srcMatrix[1][2] * srcMatrix[2][1]; + dstMatrix[0][1] = srcMatrix[1][2] * srcMatrix[2][0] - srcMatrix[1][0] * srcMatrix[2][2]; + dstMatrix[0][2] = srcMatrix[1][0] * srcMatrix[2][1] - srcMatrix[1][1] * srcMatrix[2][0]; + dstMatrix[1][0] = srcMatrix[0][2] * srcMatrix[2][1] - srcMatrix[0][1] * srcMatrix[2][2]; + dstMatrix[1][1] = srcMatrix[0][0] * srcMatrix[2][2] - srcMatrix[0][2] * srcMatrix[2][0]; + dstMatrix[1][2] = srcMatrix[0][1] * srcMatrix[2][0] - srcMatrix[0][0] * srcMatrix[2][1]; + dstMatrix[2][0] = srcMatrix[0][1] * srcMatrix[1][2] - srcMatrix[0][2] * srcMatrix[1][1]; + dstMatrix[2][1] = srcMatrix[0][2] * srcMatrix[1][0] - srcMatrix[0][0] * srcMatrix[1][2]; + dstMatrix[2][2] = srcMatrix[0][0] * srcMatrix[1][1] - srcMatrix[0][1] * srcMatrix[1][0]; + } + /** * @brief Invert the source matrix @p srcMatrix and store the result in @p dstMatrix. * @tparam DST_MATRIX The type of @p dstMatrix. @@ -620,6 +759,22 @@ struct SquareMatrixOps< 3 > return det; } + /** + * @brief Compute the cofactor of the matrix @p srcMatrix overwritting it. + * @tparam MATRIX The type of @p matrix. + * @param matrix The 3x3 matrix to take the cofactor of and overwrite. + * @note @p srcMatrix must contain floating point values. + */ + template< typename MATRIX > + LVARRAY_HOST_DEVICE constexpr inline + static auto cofactor( MATRIX && matrix ) + { + using realType = std::remove_reference_t< decltype( matrix[ 0 ][ 0 ] ) >; + realType temp[ 3 ][ 3 ]; + copy< 3, 3 >( temp, matrix ); + return cofactor( matrix, temp ); + } + /** * @brief Invert the matrix @p srcMatrix overwritting it. * @tparam MATRIX The type of @p matrix. @@ -1162,6 +1317,23 @@ struct SquareMatrixOps< 3 > dstMatrix[ 2 ][ 1 ] = srcSymMatrix[ 3 ]; } + /** + * @brief Determine the polar decomposition of the 3x3 matrix @p matrix + * @tparam DST_MATRIX The type of @p R. + * @tparam MATRIX The type of @p matrix. + * @param R The resultant orthogonal matrix. + * @param matrix The matrix to be decomposed. + * @details The polar decomposition returns an orthogonal matrix such that @p R . U = V . @p R = @p matrix. + * This is done using Higham's iterative algorithm. + */ + template< typename DST_MATRIX, typename MATRIX > + LVARRAY_HOST_DEVICE CONSTEXPR_WITHOUT_BOUNDS_CHECK inline + static void polarDecomposition( DST_MATRIX && LVARRAY_RESTRICT_REF R, + MATRIX const & LVARRAY_RESTRICT_REF matrix ) + { + polarDecompositionBase< 3 >( R, matrix ); + } + private: /** * @brief Compute the eigenvalues of the 3x3 symmetric matrix @p matrix. @@ -1255,6 +1427,152 @@ struct SquareMatrixOps< 3 > } }; +/** + * @struct SquareMatrixOps< 4 > + * @brief Performs operations on 4x4 square matrices. + */ +template<> +struct SquareMatrixOps< 4 > +{ + /** + * @return Return the determinant of the matrix @p matrix. + * @tparam MATRIX The type of @p matrix. + * @param matrix The 4x4 matrix to get the determinant of. + */ + template< typename MATRIX > + LVARRAY_HOST_DEVICE CONSTEXPR_WITHOUT_BOUNDS_CHECK inline + static auto determinant( MATRIX const & matrix ) + { + checkSizes< 4, 4 >( matrix ); + + return matrix[0][0]*( + matrix[1][1]*matrix[2][2]*matrix[3][3] + matrix[1][2]*matrix[2][3]*matrix[3][1] + matrix[1][3]*matrix[2][1]*matrix[3][2] + - matrix[1][3]*matrix[2][2]*matrix[3][1] - matrix[1][1]*matrix[2][3]*matrix[3][2] - matrix[1][2]*matrix[2][1]*matrix[3][3] + ) + - matrix[0][1]*( + matrix[1][0]*matrix[2][2]*matrix[3][3] + matrix[1][2]*matrix[2][3]*matrix[3][0] + matrix[1][3]*matrix[2][0]*matrix[3][2] + - matrix[1][3]*matrix[2][2]*matrix[3][0] - matrix[1][0]*matrix[2][3]*matrix[3][2] - matrix[1][2]*matrix[2][0]*matrix[3][3] + ) + + matrix[0][2]*( + matrix[1][0]*matrix[2][1]*matrix[3][3] + matrix[1][1]*matrix[2][3]*matrix[3][0] + matrix[1][3]*matrix[2][0]*matrix[3][1] + - matrix[1][3]*matrix[2][1]*matrix[3][0] - matrix[1][0]*matrix[2][3]*matrix[3][1] - matrix[1][1]*matrix[2][0]*matrix[3][3] + ) + - matrix[0][3]*( + matrix[1][0]*matrix[2][1]*matrix[3][2] + matrix[1][1]*matrix[2][2]*matrix[3][0] + matrix[1][2]*matrix[2][0]*matrix[3][1] + - matrix[1][2]*matrix[2][1]*matrix[3][0] - matrix[1][0]*matrix[2][2]*matrix[3][1] - matrix[1][1]*matrix[2][0]*matrix[3][2] + ); + + // return matrix[0][0] * matrix[1][1] * matrix[2][2] * matrix[3][3] - + // matrix[0][0] * matrix[1][1] * matrix[2][3] * matrix[3][2] - + // matrix[0][0] * matrix[1][2] * matrix[2][1] * matrix[3][3] + + // matrix[0][0] * matrix[1][2] * matrix[2][3] * matrix[3][1] + + // matrix[0][0] * matrix[1][3] * matrix[2][1] * matrix[3][2] - + // matrix[0][0] * matrix[1][3] * matrix[2][2] * matrix[3][1] - + + // matrix[0][1] * matrix[1][0] * matrix[2][2] * matrix[3][3] + + // matrix[0][1] * matrix[1][0] * matrix[2][3] * matrix[3][2] + + // matrix[0][1] * matrix[1][2] * matrix[2][0] * matrix[3][3] - + // matrix[0][1] * matrix[1][2] * matrix[2][3] * matrix[3][0] - + // matrix[0][1] * matrix[1][3] * matrix[2][0] * matrix[3][2] + + // matrix[0][1] * matrix[1][3] * matrix[2][2] * matrix[3][0] + + + // matrix[0][2] * matrix[1][0] * matrix[2][1] * matrix[3][3] - + // matrix[0][2] * matrix[1][0] * matrix[2][3] * matrix[3][1] - + // matrix[0][2] * matrix[1][1] * matrix[2][0] * matrix[3][3] + + // matrix[0][2] * matrix[1][1] * matrix[2][3] * matrix[3][0] + + // matrix[0][2] * matrix[1][3] * matrix[2][0] * matrix[3][1] - + // matrix[0][2] * matrix[1][3] * matrix[2][1] * matrix[3][0] - + + // matrix[0][3] * matrix[1][0] * matrix[2][1] * matrix[3][2] + + // matrix[0][3] * matrix[1][0] * matrix[2][2] * matrix[3][1] + + // matrix[0][3] * matrix[1][1] * matrix[2][0] * matrix[3][2] - + // matrix[0][3] * matrix[1][1] * matrix[2][2] * matrix[3][0] - + // matrix[0][3] * matrix[1][2] * matrix[2][0] * matrix[3][1] + + // matrix[0][3] * matrix[1][2] * matrix[2][1] * matrix[3][0]; + } + + /** + * @brief Invert the source matrix @p srcMatrix and store the result in @p dstMatrix. + * @tparam DST_MATRIX The type of @p dstMatrix. + * @tparam SRC_MATRIX The type of @p srcMatrix. + * @param dstMatrix The 4x4 matrix to write the inverse to. + * @param srcMatrix The 4x4 matrix to take the inverse of. + * @return The determinant. + * @note @p srcMatrix can contain integers but @p dstMatrix must contain floating point values. + */ + template< typename DST_MATRIX, typename SRC_MATRIX > + LVARRAY_HOST_DEVICE CONSTEXPR_WITHOUT_BOUNDS_CHECK inline + static auto invert( DST_MATRIX && LVARRAY_RESTRICT_REF dstMatrix, + SRC_MATRIX const & LVARRAY_RESTRICT_REF srcMatrix ) + { + checkSizes< 4, 4 >( dstMatrix ); + checkSizes< 4, 4 >( srcMatrix ); + + using FloatingPoint = std::decay_t< decltype( dstMatrix[ 0 ][ 0 ] ) >; + + FloatingPoint const det = determinant( srcMatrix ); + FloatingPoint const invDet = FloatingPoint( 1 ) / det; + + dstMatrix[0][0] = srcMatrix[1][1]*srcMatrix[2][2]*srcMatrix[3][3] - srcMatrix[1][1]*srcMatrix[2][3]*srcMatrix[3][2] - srcMatrix[1][2]*srcMatrix[2][1]*srcMatrix[3][3] + srcMatrix[1][2]* + srcMatrix[2][3]*srcMatrix[3][1] + srcMatrix[1][3]*srcMatrix[2][1]*srcMatrix[3][2] - srcMatrix[1][3]*srcMatrix[2][2]*srcMatrix[3][1]; + dstMatrix[0][1] = srcMatrix[0][1]*srcMatrix[2][3]*srcMatrix[3][2] - srcMatrix[0][1]*srcMatrix[2][2]*srcMatrix[3][3] + srcMatrix[0][2]*srcMatrix[2][1]*srcMatrix[3][3] - srcMatrix[0][2]* + srcMatrix[2][3]*srcMatrix[3][1] - srcMatrix[0][3]*srcMatrix[2][1]*srcMatrix[3][2] + srcMatrix[0][3]*srcMatrix[2][2]*srcMatrix[3][1]; + dstMatrix[0][2] = srcMatrix[0][1]*srcMatrix[1][2]*srcMatrix[3][3] - srcMatrix[0][1]*srcMatrix[1][3]*srcMatrix[3][2] - srcMatrix[0][2]*srcMatrix[1][1]*srcMatrix[3][3] + srcMatrix[0][2]* + srcMatrix[1][3]*srcMatrix[3][1] + srcMatrix[0][3]*srcMatrix[1][1]*srcMatrix[3][2] - srcMatrix[0][3]*srcMatrix[1][2]*srcMatrix[3][1]; + dstMatrix[0][3] = srcMatrix[0][1]*srcMatrix[1][3]*srcMatrix[2][2] - srcMatrix[0][1]*srcMatrix[1][2]*srcMatrix[2][3] + srcMatrix[0][2]*srcMatrix[1][1]*srcMatrix[2][3] - srcMatrix[0][2]* + srcMatrix[1][3]*srcMatrix[2][1] - srcMatrix[0][3]*srcMatrix[1][1]*srcMatrix[2][2] + srcMatrix[0][3]*srcMatrix[1][2]*srcMatrix[2][1]; + + dstMatrix[1][0] = srcMatrix[1][0]*srcMatrix[2][3]*srcMatrix[3][2] - srcMatrix[1][0]*srcMatrix[2][2]*srcMatrix[3][3] + srcMatrix[1][2]*srcMatrix[2][0]*srcMatrix[3][3] - srcMatrix[1][2]* + srcMatrix[2][3]*srcMatrix[3][0] - srcMatrix[1][3]*srcMatrix[2][0]*srcMatrix[3][2] + srcMatrix[1][3]*srcMatrix[2][2]*srcMatrix[3][0]; + dstMatrix[1][1] = srcMatrix[0][0]*srcMatrix[2][2]*srcMatrix[3][3] - srcMatrix[0][0]*srcMatrix[2][3]*srcMatrix[3][2] - srcMatrix[0][2]*srcMatrix[2][0]*srcMatrix[3][3] + srcMatrix[0][2]* + srcMatrix[2][3]*srcMatrix[3][0] + srcMatrix[0][3]*srcMatrix[2][0]*srcMatrix[3][2] - srcMatrix[0][3]*srcMatrix[2][2]*srcMatrix[3][0]; + dstMatrix[1][2] = srcMatrix[0][0]*srcMatrix[1][3]*srcMatrix[3][2] - srcMatrix[0][0]*srcMatrix[1][2]*srcMatrix[3][3] + srcMatrix[0][2]*srcMatrix[1][0]*srcMatrix[3][3] - srcMatrix[0][2]* + srcMatrix[1][3]*srcMatrix[3][0] - srcMatrix[0][3]*srcMatrix[1][0]*srcMatrix[3][2] + srcMatrix[0][3]*srcMatrix[1][2]*srcMatrix[3][0]; + dstMatrix[1][3] = srcMatrix[0][0]*srcMatrix[1][2]*srcMatrix[2][3] - srcMatrix[0][0]*srcMatrix[1][3]*srcMatrix[2][2] - srcMatrix[0][2]*srcMatrix[1][0]*srcMatrix[2][3] + srcMatrix[0][2]* + srcMatrix[1][3]*srcMatrix[2][0] + srcMatrix[0][3]*srcMatrix[1][0]*srcMatrix[2][2] - srcMatrix[0][3]*srcMatrix[1][2]*srcMatrix[2][0]; + + dstMatrix[2][0] = srcMatrix[1][0]*srcMatrix[2][1]*srcMatrix[3][3] - srcMatrix[1][0]*srcMatrix[2][3]*srcMatrix[3][1] - srcMatrix[1][1]*srcMatrix[2][0]*srcMatrix[3][3] + srcMatrix[1][1]* + srcMatrix[2][3]*srcMatrix[3][0] + srcMatrix[1][3]*srcMatrix[2][0]*srcMatrix[3][1] - srcMatrix[1][3]*srcMatrix[2][1]*srcMatrix[3][0]; + dstMatrix[2][1] = srcMatrix[0][0]*srcMatrix[2][3]*srcMatrix[3][1] - srcMatrix[0][0]*srcMatrix[2][1]*srcMatrix[3][3] + srcMatrix[0][1]*srcMatrix[2][0]*srcMatrix[3][3] - srcMatrix[0][1]* + srcMatrix[2][3]*srcMatrix[3][0] - srcMatrix[0][3]*srcMatrix[2][0]*srcMatrix[3][1] + srcMatrix[0][3]*srcMatrix[2][1]*srcMatrix[3][0]; + dstMatrix[2][2] = srcMatrix[0][0]*srcMatrix[1][1]*srcMatrix[3][3] - srcMatrix[0][0]*srcMatrix[1][3]*srcMatrix[3][1] - srcMatrix[0][1]*srcMatrix[1][0]*srcMatrix[3][3] + srcMatrix[0][1]* + srcMatrix[1][3]*srcMatrix[3][0] + srcMatrix[0][3]*srcMatrix[1][0]*srcMatrix[3][1] - srcMatrix[0][3]*srcMatrix[1][1]*srcMatrix[3][0]; + dstMatrix[2][3] = srcMatrix[0][0]*srcMatrix[1][3]*srcMatrix[2][1] - srcMatrix[0][0]*srcMatrix[1][1]*srcMatrix[2][3] + srcMatrix[0][1]*srcMatrix[1][0]*srcMatrix[2][3] - srcMatrix[0][1]* + srcMatrix[1][3]*srcMatrix[2][0] - srcMatrix[0][3]*srcMatrix[1][0]*srcMatrix[2][1] + srcMatrix[0][3]*srcMatrix[1][1]*srcMatrix[2][0]; + + dstMatrix[3][0] = srcMatrix[1][0]*srcMatrix[2][2]*srcMatrix[3][1] - srcMatrix[1][0]*srcMatrix[2][1]*srcMatrix[3][2] + srcMatrix[1][1]*srcMatrix[2][0]*srcMatrix[3][2] - srcMatrix[1][1]* + srcMatrix[2][2]*srcMatrix[3][0] - srcMatrix[1][2]*srcMatrix[2][0]*srcMatrix[3][1] + srcMatrix[1][2]*srcMatrix[2][1]*srcMatrix[3][0]; + dstMatrix[3][1] = srcMatrix[0][0]*srcMatrix[2][1]*srcMatrix[3][2] - srcMatrix[0][0]*srcMatrix[2][2]*srcMatrix[3][1] - srcMatrix[0][1]*srcMatrix[2][0]*srcMatrix[3][2] + srcMatrix[0][1]* + srcMatrix[2][2]*srcMatrix[3][0] + srcMatrix[0][2]*srcMatrix[2][0]*srcMatrix[3][1] - srcMatrix[0][2]*srcMatrix[2][1]*srcMatrix[3][0]; + dstMatrix[3][2] = srcMatrix[0][0]*srcMatrix[1][2]*srcMatrix[3][1] - srcMatrix[0][0]*srcMatrix[1][1]*srcMatrix[3][2] + srcMatrix[0][1]*srcMatrix[1][0]*srcMatrix[3][2] - srcMatrix[0][1]* + srcMatrix[1][2]*srcMatrix[3][0] - srcMatrix[0][2]*srcMatrix[1][0]*srcMatrix[3][1] + srcMatrix[0][2]*srcMatrix[1][1]*srcMatrix[3][0]; + dstMatrix[3][3] = srcMatrix[0][0]*srcMatrix[1][1]*srcMatrix[2][2] - srcMatrix[0][0]*srcMatrix[1][2]*srcMatrix[2][1] - srcMatrix[0][1]*srcMatrix[1][0]*srcMatrix[2][2] + srcMatrix[0][1]* + srcMatrix[1][2]*srcMatrix[2][0] + srcMatrix[0][2]*srcMatrix[1][0]*srcMatrix[2][1] - srcMatrix[0][2]*srcMatrix[1][1]*srcMatrix[2][0]; + + scale< 4, 4 >( dstMatrix, invDet ); + + return det; + } + + /** + * @brief Invert the matrix @p srcMatrix overwritting it. + * @tparam MATRIX The type of @p matrix. + * @param matrix The 4x4 matrix to take the inverse of and overwrite. + * @return The determinant. + * @note @p srcMatrix must contain floating point values. + */ + template< typename MATRIX > + LVARRAY_HOST_DEVICE constexpr inline + static auto invert( MATRIX && matrix ) + { + using realType = std::remove_reference_t< decltype( matrix[ 0 ][ 0 ] ) >; + + realType temp[ 4 ][ 4 ]; + copy< 4, 4 >( temp, matrix ); + return invert( matrix, temp ); + } +}; + } // namespace internal } // namespace tensorOps } // namespace LvArray diff --git a/src/math.hpp b/src/math.hpp index 0d358b32..b60fa466 100644 --- a/src/math.hpp +++ b/src/math.hpp @@ -440,6 +440,159 @@ __half2 abs( __half2 const x ) #endif + +/** + * @return The round value of @p x. + * @param x The number to get the round value of. + * @note This set of overloads is valid for any numeric type. + */ +LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE +float round( float const x ) +{ +#if defined(LVARRAY_DEVICE_COMPILE) + return ::roundf( x ); +#else + return std::round( x ); +#endif +} + +template< typename T > +LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE constexpr +double round( T const x ) +{ +#if defined(LVARRAY_DEVICE_COMPILE) + return ::round( double ( x ) ); +#else + return std::round( x ); +#endif +} + +#if defined( LVARRAY_USE_DEVICE ) + +/// @copydoc round( T ) +LVARRAY_DEVICE LVARRAY_FORCE_INLINE +__half round( __half const x ) +{ + return round( x ); +} + +/// @copydoc round( T ) +LVARRAY_DEVICE LVARRAY_FORCE_INLINE +__half2 round( __half2 const x ) +{ + return round( x ); +} + +#endif + + +/** + * @return The ceiling value of @p x. + * @param x The number to get the ceiling value of. + * @note This set of overloads is valid for any numeric type. + */ +LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE +float ceil( float const x ) +{ +#if defined(LVARRAY_DEVICE_COMPILE) + return ::ceilf( x ); +#else + return std::ceil( x ); +#endif +} + +template< typename T > +LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE constexpr +double ceil( T const x ) +{ +#if defined(LVARRAY_DEVICE_COMPILE) + return ::ceil( double ( x ) ); +#else + return std::ceil( x ); +#endif +} + +#if defined( LVARRAY_USE_DEVICE ) + +/// @copydoc ceil( T ) +LVARRAY_DEVICE LVARRAY_FORCE_INLINE +__half ceil( __half const x ) +{ +#if CUDART_VERSION > 11000 + return hceil( x ); +#else + return x > __half( 0 ) ? x : -x; +#endif +} + +/// @copydoc ceil( T ) +LVARRAY_DEVICE LVARRAY_FORCE_INLINE +__half2 ceil( __half2 const x ) +{ +#if CUDART_VERSION > 11000 + return h2ceil( x ); +#else + return LVARRAY_THROW( "h2ceil is not implemented for host", std::runtime_error ); // This is wrong, copied from other function used to + // mimic +#endif +} + +#endif + + +/** + * @return The floor value of @p x. + * @param x The number to get the floor value of. + * @note This set of overloads is valid for any numeric type. + */ +LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE +float floor( float const x ) +{ +#if defined(LVARRAY_DEVICE_COMPILE) + return ::floorf( x ); +#else + return std::floor( x ); +#endif +} + +template< typename T > +LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE constexpr +double floor( T const x ) +{ +#if defined(LVARRAY_DEVICE_COMPILE) + return ::floor( double ( x ) ); +#else + return std::floor( x ); +#endif +} + +#if defined( LVARRAY_USE_DEVICE ) + +/// @copydoc floor( T ) +LVARRAY_DEVICE LVARRAY_FORCE_INLINE +__half floor( __half const x ) +{ +#if CUDART_VERSION > 11000 + return hfloor( x ); +#else + return x > __half( 0 ) ? x : -x; +#endif +} + +/// @copydoc floor( T ) +LVARRAY_DEVICE LVARRAY_FORCE_INLINE +__half2 floor( __half2 const x ) +{ +#if CUDART_VERSION > 11000 + return h2floor( x ); +#else + return LVARRAY_THROW( "h2floor is not implemented for host", std::runtime_error ); +#endif +} + +#endif + + /** * @return @code x * x @endcode. * @tparam T The typeof @p x. @@ -452,6 +605,44 @@ T square( T const x ) ///@} + +/** + * @name Power. + */ +///@{ + +/** + * @return The power of @p x. + * @param x The number to get the power of. + * @param n The exponent. + * @note This set of overloads is valid for any numeric type. If @p x is integral it is converted to @c double + * and the return type is @c double. + */ +LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE +float pow( float const x, float const n ) +{ +#if defined(LVARRAY_DEVICE_COMPILE) + return ::powf( x, n ); +#else + return std::pow( x, n ); +#endif +} + +/// @copydoc pow( float ) +template< typename T > +LVARRAY_HOST_DEVICE LVARRAY_FORCE_INLINE +double pow( T const x, T const n ) +{ +#if defined(LVARRAY_DEVICE_COMPILE) + return ::pow( double( x ), double( n ) ); +#else + return std::pow( x, n ); +#endif +} + +///@} + + /** * @name Square root and inverse square root. */ diff --git a/unitTests/CMakeLists.txt b/unitTests/CMakeLists.txt index 3f5bb961..625a1209 100644 --- a/unitTests/CMakeLists.txt +++ b/unitTests/CMakeLists.txt @@ -70,8 +70,11 @@ set( testSources testStackTrace.cpp testTensorOpsDeterminant.cpp testTensorOpsEigen.cpp + testTensorOpsCofactorOneArg.cpp + testTensorOpsCofactorTwoArgs.cpp testTensorOpsInverseOneArg.cpp testTensorOpsInverseTwoArgs.cpp + testTensorOpsPolarDecomposition.cpp testTensorOpsSymDeterminant.cpp testTensorOpsSymInverseOneArg.cpp testTensorOpsSymInverseTwoArgs.cpp diff --git a/unitTests/testTensorOpsCofactorOneArg.cpp b/unitTests/testTensorOpsCofactorOneArg.cpp new file mode 100644 index 00000000..7af3c287 --- /dev/null +++ b/unitTests/testTensorOpsCofactorOneArg.cpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2021, Lawrence Livermore National Security, LLC and LvArray contributors. + * All rights reserved. + * See the LICENSE file for details. + * SPDX-License-Identifier: (BSD-3-Clause) + */ + +#include "testTensorOpsInverse.hpp" + +namespace LvArray +{ +namespace testing +{ + +TYPED_TEST( InverseFloatOnlyTest, cofactorOneArg ) +{ + this->cofactorOneArg(); +} + +} // namespace testing +} // namespace LvArray + +// This is the default gtest main method. It is included for ease of debugging. +int main( int argc, char * * argv ) +{ + ::testing::InitGoogleTest( &argc, argv ); + int const result = RUN_ALL_TESTS(); + return result; +} diff --git a/unitTests/testTensorOpsCofactorTwoArgs.cpp b/unitTests/testTensorOpsCofactorTwoArgs.cpp new file mode 100644 index 00000000..21f9992a --- /dev/null +++ b/unitTests/testTensorOpsCofactorTwoArgs.cpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2021, Lawrence Livermore National Security, LLC and LvArray contributors. + * All rights reserved. + * See the LICENSE file for details. + * SPDX-License-Identifier: (BSD-3-Clause) + */ + +#include "testTensorOpsInverse.hpp" + +namespace LvArray +{ +namespace testing +{ + +TYPED_TEST( InverseTest, cofactorTwoArgs ) +{ + this->cofactorTwoArgs(); +} + +} // namespace testing +} // namespace LvArray + +// This is the default gtest main method. It is included for ease of debugging. +int main( int argc, char * * argv ) +{ + ::testing::InitGoogleTest( &argc, argv ); + int const result = RUN_ALL_TESTS(); + return result; +} diff --git a/unitTests/testTensorOpsInverse.hpp b/unitTests/testTensorOpsInverse.hpp index 9edfa950..1e1461f6 100644 --- a/unitTests/testTensorOpsInverse.hpp +++ b/unitTests/testTensorOpsInverse.hpp @@ -90,6 +90,84 @@ class InverseTest : public ::testing::Test } ); } + void cofactorTwoArgs() + { + ArrayViewT< T, 3, 2 > const srcMatrix_IJK = m_srcMatrix_IJK.toView(); + ArrayViewT< T, 3, 1 > const srcMatrix_IKJ = m_srcMatrix_IKJ.toView(); + ArrayViewT< T, 3, 0 > const srcMatrix_KJI = m_srcMatrix_KJI.toView(); + + ArrayViewT< FLOAT, 3, 2 > const dstMatrix_IJK = m_dstMatrix_IJK.toView(); + ArrayViewT< FLOAT, 3, 1 > const dstMatrix_IKJ = m_dstMatrix_IKJ.toView(); + ArrayViewT< FLOAT, 3, 0 > const dstMatrix_KJI = m_dstMatrix_KJI.toView(); + + ArrayT< T, RAJA::PERM_IJK > arrayOfMatrices( numMatrices, M, M ); + + T scale = 100; + for( T & value : arrayOfMatrices ) + { value = randomValue( scale, m_gen ); } + + ArrayViewT< T const, 3, 2 > const matrices = arrayOfMatrices.toViewConst(); + + forall< POLICY >( matrices.size( 0 ), [=] LVARRAY_HOST_DEVICE ( INDEX_TYPE const i ) + { + T srcLocal[ M ][ M ]; + FLOAT dstLocal[ M ][ M ]; + + #define _TEST( output, input ) \ + tensorOps::copy< M, M >( input, matrices[ i ] ); \ + tensorOps::cofactor< M >( output, input ); \ + checkCofactor( scale, output, input ) + + #define _TEST_PERMS( input, output0, output1, output2, output3 ) \ + _TEST( output0, input ); \ + _TEST( output1, input ); \ + _TEST( output2, input ); \ + _TEST( output3, input ) + + _TEST_PERMS( srcMatrix_IJK[ i ], dstMatrix_IJK[ i ], dstMatrix_IKJ[ i ], dstMatrix_KJI[ i ], dstLocal ); + _TEST_PERMS( srcMatrix_IKJ[ i ], dstMatrix_IJK[ i ], dstMatrix_IKJ[ i ], dstMatrix_KJI[ i ], dstLocal ); + _TEST_PERMS( srcMatrix_KJI[ i ], dstMatrix_IJK[ i ], dstMatrix_IKJ[ i ], dstMatrix_KJI[ i ], dstLocal ); + _TEST_PERMS( srcLocal, dstMatrix_IJK[ i ], dstMatrix_IKJ[ i ], dstMatrix_KJI[ i ], dstLocal ); + + #undef _TEST_PERMS + #undef _TEST + } ); + } + + void cofactorOneArg() + { + ArrayViewT< T, 3, 2 > const srcMatrix_IJK = m_srcMatrix_IJK.toView(); + ArrayViewT< T, 3, 1 > const srcMatrix_IKJ = m_srcMatrix_IKJ.toView(); + ArrayViewT< T, 3, 0 > const srcMatrix_KJI = m_srcMatrix_KJI.toView(); + + ArrayT< T, RAJA::PERM_IJK > arrayOfMatrices( numMatrices, M, M ); + + T scale = 100; + for( T & value : arrayOfMatrices ) + { value = randomValue( scale, m_gen ); } + + ArrayViewT< T const, 3, 2 > const matrices = arrayOfMatrices.toViewConst(); + + forall< POLICY >( matrices.size( 0 ), [=] LVARRAY_HOST_DEVICE ( INDEX_TYPE const i ) + { + T initialMatrix[ M ][ M ]; + T srcLocal[ M ][ M ]; + + #define _TEST( matrix ) \ + tensorOps::copy< M, M >( initialMatrix, matrices[ i ] ); \ + tensorOps::copy< M, M >( matrix, initialMatrix ); \ + tensorOps::cofactor< M >( matrix ); \ + checkCofactor( scale, matrix, initialMatrix ) + + _TEST( srcMatrix_IJK[ i ] ); + _TEST( srcMatrix_IKJ[ i ] ); + _TEST( srcMatrix_KJI[ i ] ); + _TEST( srcLocal ); + + #undef _TEST + } ); + } + void inverseTwoArgs() { ArrayViewT< T, 3, 2 > const srcMatrix_IJK = m_srcMatrix_IJK.toView(); @@ -244,6 +322,52 @@ class InverseTest : public ::testing::Test } ); } + void polarDecomposition() + { + ArrayViewT< T, 3, 2 > const srcMatrix_IJK = m_srcMatrix_IJK.toView(); + ArrayViewT< T, 3, 1 > const srcMatrix_IKJ = m_srcMatrix_IKJ.toView(); + ArrayViewT< T, 3, 0 > const srcMatrix_KJI = m_srcMatrix_KJI.toView(); + + ArrayViewT< FLOAT, 3, 2 > const dstMatrix_IJK = m_dstMatrix_IJK.toView(); + ArrayViewT< FLOAT, 3, 1 > const dstMatrix_IKJ = m_dstMatrix_IKJ.toView(); + ArrayViewT< FLOAT, 3, 0 > const dstMatrix_KJI = m_dstMatrix_KJI.toView(); + + ArrayT< T, RAJA::PERM_IJK > arrayOfMatrices( numMatrices, M, M ); + + T scale = 100; + for( T & value : arrayOfMatrices ) + { + value = randomValue( scale, m_gen ); + } + + ArrayViewT< T const, 3, 2 > const matrices = arrayOfMatrices.toViewConst(); + + forall< POLICY >( matrices.size( 0 ), [=] LVARRAY_HOST_DEVICE ( INDEX_TYPE const i ) + { + T srcLocal[ M ][ M ]; + FLOAT dstLocal[ M ][ M ]; + + #define _TEST( output, input ) \ + tensorOps::copy< M, M >( input, matrices[ i ] ); \ + tensorOps::polarDecomposition< M >( output, input ); \ + checkPolarDecomposition( output, input ) + + #define _TEST_PERMS( input, output0, output1, output2, output3 ) \ + _TEST( output0, input ); \ + _TEST( output1, input ); \ + _TEST( output2, input ); \ + _TEST( output3, input ) + + _TEST_PERMS( srcMatrix_IJK[ i ], dstMatrix_IJK[ i ], dstMatrix_IKJ[ i ], dstMatrix_KJI[ i ], dstLocal ); + _TEST_PERMS( srcMatrix_IKJ[ i ], dstMatrix_IJK[ i ], dstMatrix_IKJ[ i ], dstMatrix_KJI[ i ], dstLocal ); + _TEST_PERMS( srcMatrix_KJI[ i ], dstMatrix_IJK[ i ], dstMatrix_IKJ[ i ], dstMatrix_KJI[ i ], dstLocal ); + _TEST_PERMS( srcLocal, dstMatrix_IJK[ i ], dstMatrix_IKJ[ i ], dstMatrix_KJI[ i ], dstLocal ); + + #undef _TEST_PERMS + #undef _TEST + } ); + } + private: @@ -294,6 +418,24 @@ class InverseTest : public ::testing::Test 3 * scale * scale * scale * epsilon ) } + + template< typename MATRIX_A, typename MATRIX_B > + static void LVARRAY_HOST_DEVICE checkCofactor( T const scale, MATRIX_A && cofactor, MATRIX_B && source ) + { + FLOAT const epsilon = NumericLimitsNC< FLOAT >{}.epsilon; + + FLOAT inverse[ M ][ M ]; + T det = tensorOps::invert< M >( inverse, source ); + tensorOps::transpose< M >( inverse ); + tensorOps::scale< M, M >( inverse, det ); + + for( int i = 0; i < M; ++i ) + { + for( int j = 0; j < M; ++j ) + { PORTABLE_EXPECT_NEAR( cofactor[ i ][ j ], inverse[ i ][ j ], 5 * scale * epsilon ); } + } + } + template< typename MATRIX_A, typename MATRIX_B > static void LVARRAY_HOST_DEVICE checkInverse( T const scale, T const det, @@ -347,6 +489,53 @@ class InverseTest : public ::testing::Test } } + template< typename MATRIX_A, typename MATRIX_B > + static void LVARRAY_HOST_DEVICE checkPolarDecomposition( MATRIX_A const & orthogonalMatrix, MATRIX_B const & sourceMatrix ) + { + // Error tolerance (depends on the implementation!) + using FloatingPoint = std::decay_t< decltype( orthogonalMatrix[0][0] ) >; + FloatingPoint tolerance = 10 * LvArray::NumericLimits< FloatingPoint >::epsilon; + FloatingPoint sqrtTolerance = math::sqrt( tolerance ); + + if( M == 2 ) + { + // Check if orthogonalMatrix is actually orthogonal + PORTABLE_EXPECT_NEAR( orthogonalMatrix[0][0] * orthogonalMatrix[0][0] + orthogonalMatrix[0][1] * orthogonalMatrix[0][1], 1.0, tolerance ); + PORTABLE_EXPECT_NEAR( orthogonalMatrix[0][0] * orthogonalMatrix[0][1] + orthogonalMatrix[1][0] * orthogonalMatrix[1][1], 0.0, tolerance ); + PORTABLE_EXPECT_NEAR( orthogonalMatrix[0][0] * orthogonalMatrix[1][0] + orthogonalMatrix[0][1] * orthogonalMatrix[1][1], 0.0, tolerance ); + PORTABLE_EXPECT_NEAR( orthogonalMatrix[1][0] * orthogonalMatrix[1][0] + orthogonalMatrix[1][1] * orthogonalMatrix[1][1], 1.0, tolerance ); + + // Check if sourceMatrix.(orthogonalMatrix^T) yields a symmetric matrix + PORTABLE_EXPECT_NEAR( orthogonalMatrix[1][0] * sourceMatrix[0][0] + orthogonalMatrix[1][1] * sourceMatrix[0][1], + orthogonalMatrix[0][0] * sourceMatrix[1][0] + orthogonalMatrix[0][1] * sourceMatrix[1][1], + sqrtTolerance ); + } + else if( M == 3 ) + { + // Check if orthogonalMatrix is actually orthogonal + PORTABLE_EXPECT_NEAR( orthogonalMatrix[0][0] * orthogonalMatrix[0][0] + orthogonalMatrix[0][1] * orthogonalMatrix[0][1] + orthogonalMatrix[0][2] * orthogonalMatrix[0][2], 1.0, tolerance ); + PORTABLE_EXPECT_NEAR( orthogonalMatrix[0][0] * orthogonalMatrix[1][0] + orthogonalMatrix[0][1] * orthogonalMatrix[1][1] + orthogonalMatrix[0][2] * orthogonalMatrix[1][2], 0.0, tolerance ); + PORTABLE_EXPECT_NEAR( orthogonalMatrix[0][0] * orthogonalMatrix[2][0] + orthogonalMatrix[0][1] * orthogonalMatrix[2][1] + orthogonalMatrix[0][2] * orthogonalMatrix[2][2], 0.0, tolerance ); + PORTABLE_EXPECT_NEAR( orthogonalMatrix[0][0] * orthogonalMatrix[1][0] + orthogonalMatrix[0][1] * orthogonalMatrix[1][1] + orthogonalMatrix[0][2] * orthogonalMatrix[1][2], 0.0, tolerance ); + PORTABLE_EXPECT_NEAR( orthogonalMatrix[1][0] * orthogonalMatrix[1][0] + orthogonalMatrix[1][1] * orthogonalMatrix[1][1] + orthogonalMatrix[1][2] * orthogonalMatrix[1][2], 1.0, tolerance ); + PORTABLE_EXPECT_NEAR( orthogonalMatrix[1][0] * orthogonalMatrix[2][0] + orthogonalMatrix[1][1] * orthogonalMatrix[2][1] + orthogonalMatrix[1][2] * orthogonalMatrix[2][2], 0.0, tolerance ); + PORTABLE_EXPECT_NEAR( orthogonalMatrix[0][0] * orthogonalMatrix[2][0] + orthogonalMatrix[0][1] * orthogonalMatrix[2][1] + orthogonalMatrix[0][2] * orthogonalMatrix[2][2], 0.0, tolerance ); + PORTABLE_EXPECT_NEAR( orthogonalMatrix[1][0] * orthogonalMatrix[2][0] + orthogonalMatrix[1][1] * orthogonalMatrix[2][1] + orthogonalMatrix[1][2] * orthogonalMatrix[2][2], 0.0, tolerance ); + PORTABLE_EXPECT_NEAR( orthogonalMatrix[2][0] * orthogonalMatrix[2][0] + orthogonalMatrix[2][1] * orthogonalMatrix[2][1] + orthogonalMatrix[2][2] * orthogonalMatrix[2][2], 1.0, tolerance ); + + // Check if sourceMatrix.(orthogonalMatrix^T) yields a symmetric matrix + PORTABLE_EXPECT_NEAR( orthogonalMatrix[1][0] * sourceMatrix[0][0] + orthogonalMatrix[1][1] * sourceMatrix[0][1] + orthogonalMatrix[1][2] * sourceMatrix[0][2], + orthogonalMatrix[0][0] * sourceMatrix[1][0] + orthogonalMatrix[0][1] * sourceMatrix[1][1] + orthogonalMatrix[0][2] * sourceMatrix[1][2], + sqrtTolerance ); + PORTABLE_EXPECT_NEAR( orthogonalMatrix[2][0] * sourceMatrix[0][0] + orthogonalMatrix[2][1] * sourceMatrix[0][1] + orthogonalMatrix[2][2] * sourceMatrix[0][2], + orthogonalMatrix[0][0] * sourceMatrix[2][0] + orthogonalMatrix[0][1] * sourceMatrix[2][1] + orthogonalMatrix[0][2] * sourceMatrix[2][2], + sqrtTolerance ); + PORTABLE_EXPECT_NEAR( orthogonalMatrix[2][0] * sourceMatrix[1][0] + orthogonalMatrix[2][1] * sourceMatrix[1][1] + orthogonalMatrix[2][2] * sourceMatrix[1][2], + orthogonalMatrix[1][0] * sourceMatrix[2][0] + orthogonalMatrix[1][1] * sourceMatrix[2][1] + orthogonalMatrix[1][2] * sourceMatrix[2][2], + sqrtTolerance ); + } + } + std::mt19937_64 m_gen; diff --git a/unitTests/testTensorOpsPolarDecomposition.cpp b/unitTests/testTensorOpsPolarDecomposition.cpp new file mode 100644 index 00000000..29c48dc0 --- /dev/null +++ b/unitTests/testTensorOpsPolarDecomposition.cpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2021, Lawrence Livermore National Security, LLC and LvArray contributors. + * All rights reserved. + * See the LICENSE file for details. + * SPDX-License-Identifier: (BSD-3-Clause) + */ + +#include "testTensorOpsInverse.hpp" + +namespace LvArray +{ +namespace testing +{ + +TYPED_TEST( InverseTest, polarDecomposition ) +{ + this->polarDecomposition(); +} + +} // namespace testing +} // namespace LvArray + +// This is the default gtest main method. It is included for ease of debugging. +int main( int argc, char * * argv ) +{ + ::testing::InitGoogleTest( &argc, argv ); + int const result = RUN_ALL_TESTS(); + return result; +}