//===-- duals/dual_eigen - wrapp dual number type for Eigen -----*- C++ -*-===// // // Part of the cppduals project. // https://gitlab.com/tesch1/cppduals // // See https://gitlab.com/tesch1/cppduals/blob/master/LICENSE.txt for // license information. // // (c)2019 Michael Tesch. tesch1@gmail.com // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. // // Some code fragments are adapted from Eigen's Complex.h files, which // carry the following license: // // Copyright (C) 2014 Benoit Steiner (benoit.steiner.goog@gmail.com) // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef CPPDUALS_DUAL_EIGEN #define CPPDUALS_DUAL_EIGEN #include "dual" #ifndef PARSED_BY_DOXYGEN #include #include #endif #if !EIGEN_VERSION_AT_LEAST(3, 3, 0) #error "Eigen too old for cppduals. Upgrade." #endif /** \file dual_eigen \brief Nestable, vectorizable Dual numbers for Eigen Include this file to enable use of the `duals::dual<>` class as a scalar type in Eigen. Some optimizations are performed using Eigen's vectorization facilities, which is particularly noticeable for multiplication and division. In certain cases the vectorization can be worse than the compiler's code, thus it can be disabled by `#define CPPDUALS_DONT_VECTORIZE`. There is some also vectorization for dual-ized complex's (`std::complex>`), which can be disabled by `#define CPPDUALS_DONT_VECTORIZE_CDUAL`. The same type promotion that exists for `duals::dual`: an operation between a 'scalar' T and dual is promoted to dual. This is enabled for Eigen matrices by default: multiplying an `Eigen::Matrix` by `1_ef` results in an expression with a basic POD type of `Eigen::Matrix,..>`. This type of type promotion can be disabled (ie for correctness checking or to speed compilation) by #define `CPPDUALS_NO_EIGEN_PROMOTION`. */ namespace duals { /** template unary functor to get the real part of a matrix * use it like this: m2 = m1.unaryExpr(CwiseRpartOp()); * or just call m2 = rpart(m1); */ template struct CwiseRpartOp { typedef decltype(rpart(ScalarSrc())) ScalarDst; typedef ScalarDst result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ScalarDst operator()(const ScalarSrc & x) const { return rpart(x); } }; /** template unary functor to get the dual part of a matrix * use it like this: m2 = m1.unaryExpr(CwiseDpartOp()); * or just call m2 = dpart(m1); */ template struct CwiseDpartOp { typedef decltype(dpart(ScalarSrc())) ScalarDst; typedef ScalarDst result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ScalarDst operator()(const ScalarSrc & x) const { return dpart(x); } }; /** template unary functor to dual-conjugate a matrix of duals * use it like this: m2 = m1.unaryExpr(CwiseDconjOp()); * or just call m2 = dconj(m1); */ template struct CwiseDconjOp { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const Scalar & x) const { return dconj(x); } }; /// Extract the "real part" of a dual-valued matrix. #ifdef PARSED_BY_DOXYGEN template const RealType #else template const Eigen::CwiseUnaryOp, const XprType > #endif rpart(const Eigen::EigenBase & x) { return x.derived().unaryExpr(CwiseRpartOp()); } /// Extract the "dual part" of a dual-valued matrix. #ifdef PARSED_BY_DOXYGEN template const RealType #else template const Eigen::CwiseUnaryOp, const XprType > #endif dpart(const Eigen::EigenBase & x) { return x.derived().unaryExpr(CwiseDpartOp()); } /// Dual-conjugate a dual-valued matrix. #ifdef PARSED_BY_DOXYGEN template const XprType #else template const Eigen::CwiseUnaryOp, const XprType > #endif dconj(const Eigen::EigenBase & x) { return x.derived().unaryExpr(CwiseDconjOp()); } } // namespace duals namespace Eigen { template struct NumTraits > : GenericNumTraits { typedef duals::dual Real; typedef duals::dual Literal; typedef duals::dual Nested; enum { IsInteger = NumTraits::IsInteger, IsSigned = NumTraits::IsSigned, IsComplex = 0, RequireInitialization = NumTraits::RequireInitialization, ReadCost = 2 * NumTraits::ReadCost, AddCost = 2 * NumTraits::AddCost, MulCost = 3 * NumTraits::MulCost + 1 * NumTraits::AddCost }; EIGEN_DEVICE_FUNC static inline Real epsilon() { return Real(NumTraits::epsilon()); } EIGEN_DEVICE_FUNC static inline Real dummy_precision() { return NumTraits::dummy_precision(); } EIGEN_DEVICE_FUNC static inline Real highest() { return NumTraits::highest(); } EIGEN_DEVICE_FUNC static inline Real lowest() { return NumTraits::lowest(); } EIGEN_DEVICE_FUNC static inline int digits10() { return NumTraits::digits10(); } }; #if !defined(CPPDUALS_NO_EIGEN_PROMOTION) template struct ScalarBinaryOpTraits,duals::dual,BinaryOp> : public duals::can_promote,duals::dual>::wrap {}; template struct ScalarBinaryOpTraits,T,BinaryOp> : public duals::can_promote,T>::wrap {}; template struct ScalarBinaryOpTraits,BinaryOp> : public duals::can_promote,T>::wrap {}; #if 0 // until Eigen doesnt make assumptions about complex return types :P template struct ScalarBinaryOpTraits,duals::dual,BinaryOp> : public duals::can_promote,duals::dual>::wrap {}; template struct ScalarBinaryOpTraits,std::complex,BinaryOp> : public duals::can_promote,duals::dual>::wrap {}; #endif #ifndef PARSED_BY_DOXYGEN // Special cases for nested complex> and reals #define CPPDUALS_CD_SBOTS_REALS(U) \ template \ struct ScalarBinaryOpTraits>,U,BinaryOp> \ : public duals::can_promote>,U>::wrap {}; \ template \ struct ScalarBinaryOpTraits>,BinaryOp> \ : public duals::can_promote>,U>::wrap {} CPPDUALS_CD_SBOTS_REALS(int); CPPDUALS_CD_SBOTS_REALS(float); CPPDUALS_CD_SBOTS_REALS(double); CPPDUALS_CD_SBOTS_REALS(std::complex); #endif // PARSED_BY_DOXYGEN #endif // CPPDUALS_NO_EIGEN_PROMOTION #ifndef PARSED_BY_DOXYGEN namespace numext { using duals::rpart; using duals::dpart; using duals::dconj; } namespace internal { #if 0 // For MatrixExponential.h to treat duals::dual as a known type and compile it template struct is_exp_known_type; template struct matrix_exp_computeUV; template struct is_exp_known_type> : is_exp_known_type {}; template struct matrix_exp_computeUV > : matrix_exp_computeUV { typedef typename NumTraits::Scalar>::Real RealScalar; }; #endif #if 1 // this is used by the packet math for SSE to copy raw duals around. template struct real_impl > { typedef T RealScalar; EIGEN_DEVICE_FUNC static inline T run(const duals::dual& x) { return x.rpart(); } }; template struct real_ref_impl> { typedef typename NumTraits::Real RealScalar; EIGEN_DEVICE_FUNC static inline RealScalar & run(duals::dual & x) { return reinterpret_cast(&x)[0]; } EIGEN_DEVICE_FUNC static inline const RealScalar & run(const duals::dual & x) { return reinterpret_cast(&x)[0]; } }; template struct real_ref_retval> { typedef typename NumTraits::Real & type; }; #else //// template struct real_impl > { typedef duals::dual RealScalar; EIGEN_DEVICE_FUNC static inline RealScalar run(const duals::dual& x) { return x; } }; template struct real_ref_impl> { typedef typename NumTraits >::Real RealScalar; EIGEN_DEVICE_FUNC static inline RealScalar & run(duals::dual & x) { return reinterpret_cast(&x)[0]; } EIGEN_DEVICE_FUNC static inline const RealScalar & run(const duals::dual & x) { return reinterpret_cast(&x)[0]; } }; template struct real_ref_retval> { typedef typename NumTraits>::Real & type; }; #endif /// 0 template struct scalar_random_op> { EIGEN_EMPTY_STRUCT_CTOR(scalar_random_op) inline const duals::dual operator() () const { return duals::random>(); } }; // // stuff for gebp_* template EIGEN_DEVICE_FUNC inline Packet pdconj(const Packet& a) { return numext::dconj(a); } template struct dconj_if; template<> struct dconj_if { template inline T operator()(const T& x) const { return numext::dconj(x); } template inline T pdconj(const T& x) const { return internal::pdconj(x); } }; template<> struct dconj_if { template inline const T& operator()(const T& x) const { return x; } template inline const T& pdconj(const T& x) const { return x; } }; // Generic implementation for custom dual types. template struct dconj_helper { typedef typename ScalarBinaryOpTraits::ReturnType Scalar; EIGEN_STRONG_INLINE Scalar pmadd(const LhsScalar& x, const RhsScalar& y, const Scalar& c) const { return padd(c, pmul(x,y)); } EIGEN_STRONG_INLINE Scalar pmul(const LhsScalar& x, const RhsScalar& y) const { return dconj_if()(x) * dconj_if()(y); } }; template struct dconj_helper { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const { return internal::pmadd(x,y,c); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const { return internal::pmul(x,y); } }; template struct dconj_helper, duals::dual, false,true> { typedef duals::dual Scalar; EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const { return c + pmul(x,y); } EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const { return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::imag(x)*numext::real(y) - numext::real(x)*numext::imag(y)); } }; template struct dconj_helper, duals::dual, true,false> { typedef duals::dual Scalar; EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const { return c + pmul(x,y); } EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const { return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); } }; template struct dconj_helper, duals::dual, true,true> { typedef duals::dual Scalar; EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const { return c + pmul(x,y); } EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const { return Scalar(numext::real(x)*numext::real(y) - numext::imag(x)*numext::imag(y), -numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); } }; template struct dconj_helper, RealScalar, Conj,false> { typedef duals::dual Scalar; EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const RealScalar& y, const Scalar& c) const { return padd(c, pmul(x,y)); } EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const RealScalar& y) const { return dconj_if()(x)*y; } }; template struct dconj_helper, false,Conj> { typedef duals::dual Scalar; EIGEN_STRONG_INLINE Scalar pmadd(const RealScalar& x, const Scalar& y, const Scalar& c) const { return padd(c, pmul(x,y)); } EIGEN_STRONG_INLINE Scalar pmul(const RealScalar& x, const Scalar& y) const { return x*dconj_if()(y); } }; } // namespace internal #endif // PARSED_BY_DOXYGEN } // namespace Eigen #if !defined(CPPDUALS_DONT_VECTORIZE) #ifndef PARSED_BY_DOXYGEN // first thing Eigen does: stop the compiler from committing suicide #include "Eigen/src/Core/util/DisableStupidWarnings.h" #define EIGEN_MAKE_DCONJ_HELPER_DUAL_REAL(PACKET_DUAL, PACKET_REAL) \ template<> struct dconj_helper { \ EIGEN_STRONG_INLINE PACKET_DUAL pmadd(const PACKET_REAL& x, const PACKET_DUAL& y, const PACKET_DUAL& c) const \ { return padd(c, pmul(x,y)); } \ EIGEN_STRONG_INLINE PACKET_DUAL pmul(const PACKET_REAL& x, const PACKET_DUAL& y) const \ { return PACKET_DUAL(Eigen::internal::pmul(x, PACKET_REAL(y.v))); } \ }; \ \ template<> struct dconj_helper { \ EIGEN_STRONG_INLINE PACKET_DUAL pmadd(const PACKET_DUAL& x, const PACKET_REAL& y, const PACKET_DUAL& c) const \ { return padd(c, pmul(x,y)); } \ EIGEN_STRONG_INLINE PACKET_DUAL pmul(const PACKET_DUAL& x, const PACKET_REAL& y) const \ { return PACKET_DUAL(Eigen::internal::pmul(PACKET_REAL(x.v), y)); } \ }; #if defined EIGEN_VECTORIZE_AVX512 #include "duals/arch/SSE/Dual.h" #include "duals/arch/AVX/Dual.h" #include "duals/arch/SSE/ComplexDual.h" #include "duals/arch/AVX/ComplexDual.h" #elif defined EIGEN_VECTORIZE_AVX #include "duals/arch/SSE/Dual.h" #include "duals/arch/AVX/Dual.h" #include "duals/arch/SSE/ComplexDual.h" #include "duals/arch/AVX/ComplexDual.h" #elif defined EIGEN_VECTORIZE_SSE #include "duals/arch/SSE/Dual.h" #include "duals/arch/SSE/ComplexDual.h" #elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX) // #include "duals/arch/AltiVec/Dual.h" // TODO #elif defined EIGEN_VECTORIZE_NEON #include "duals/arch/NEON/Dual.h" #include "duals/arch/NEON/ComplexDual.h" #elif defined EIGEN_VECTORIZE_ZVECTOR // #include "duals/arch/ZVector/Dual.h" // TODO #endif #undef EIGEN_MAKE_DCONJ_HELPER_DUAL_REAL // reallow compiler seppuku #include "Eigen/src/Core/util/ReenableStupidWarnings.h" #endif // PARSED_BY_DOXYGEN //////// gepb for duals::dual #if 0 // TODO namespace Eigen { namespace internal { template class gebp_traits, RealScalar, _ConjLhs, false> { public: typedef duals::dual LhsScalar; typedef RealScalar RhsScalar; typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { ConjLhs = _ConjLhs, ConjRhs = false, Vectorizable = packet_traits::Vectorizable && packet_traits::Vectorizable, LhsPacketSize = Vectorizable ? packet_traits::size : 1, RhsPacketSize = Vectorizable ? packet_traits::size : 1, ResPacketSize = Vectorizable ? packet_traits::size : 1, NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS, nr = 4, #if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX) // we assume 16 registers mr = 3*LhsPacketSize, #else mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize, #endif LhsProgress = LhsPacketSize, RhsProgress = 1 }; typedef typename packet_traits::type _LhsPacket; typedef typename packet_traits::type _RhsPacket; typedef typename packet_traits::type _ResPacket; typedef typename conditional::type LhsPacket; typedef typename conditional::type RhsPacket; typedef typename conditional::type ResPacket; typedef ResPacket AccPacket; EIGEN_STRONG_INLINE void initAcc(AccPacket& p) { p = pset1(ResScalar(0)); } EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const { dest = pset1(*b); } EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const { dest = pset1(*b); } EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const { dest = pload(a); } EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacket& dest) const { dest = ploadu(a); } EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3) { pbroadcast4(b, b0, b1, b2, b3); } // EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1) // { // pbroadcast2(b, b0, b1); // } EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp) const { madd_impl(a, b, c, tmp, typename conditional::type()); } EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const { //std::cerr << CYELLOW "%" << CRESET; //duals::dual, RealScalar // accpacket=lhspacket #ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD EIGEN_UNUSED_VARIABLE(tmp); c.v = pmadd(a.v,b,c.v); #else tmp = b; tmp = pmul(RhsPacket(a.v),tmp); c = AccPacket(padd(RhsPacket(c.v),tmp)); #endif } EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const false_type&) const { c += a * b; } EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const { r = cj.pmadd(c,alpha,r); } protected: dconj_helper cj; }; template class gebp_traits, duals::dual, _ConjLhs, _ConjRhs > { public: typedef duals::dual Scalar; typedef duals::dual LhsScalar; typedef duals::dual RhsScalar; typedef duals::dual ResScalar; enum { ConjLhs = _ConjLhs, ConjRhs = _ConjRhs, Vectorizable = packet_traits::Vectorizable && packet_traits::Vectorizable, RealPacketSize = Vectorizable ? packet_traits::size : 1, ResPacketSize = Vectorizable ? packet_traits::size : 1, LhsPacketSize = Vectorizable ? packet_traits::size : 1, RhsPacketSize = Vectorizable ? packet_traits::size : 1, // FIXME: should depend on NumberOfRegisters nr = 4, mr = ResPacketSize, LhsProgress = ResPacketSize, RhsProgress = 1 }; typedef typename packet_traits::type RealPacket; typedef typename packet_traits::type ScalarPacket; typedef DoublePacket DoublePacketType; typedef typename conditional::type LhsPacket; typedef typename conditional::type RhsPacket; typedef typename conditional::type ResPacket; typedef typename conditional::type AccPacket; EIGEN_STRONG_INLINE void initAcc(Scalar& p) { p = Scalar(0); } EIGEN_STRONG_INLINE void initAcc(DoublePacketType& p) { p.first = pset1(RealScalar(0)); p.second = pset1(RealScalar(0)); } // Scalar path EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, ResPacket& dest) const { dest = pset1(*b); } // Vectorized path EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, DoublePacketType& dest) const { dest.first = pset1(rpart(*b)); dest.second = pset1(dpart(*b)); } EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, ResPacket& dest) const { loadRhs(b,dest); } EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, DoublePacketType& dest) const { eigen_internal_assert(unpacket_traits::size<=4); loadRhs(b,dest); } EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3) { // FIXME not sure that's the best way to implement it! loadRhs(b+0, b0); loadRhs(b+1, b1); loadRhs(b+2, b2); loadRhs(b+3, b3); } // Vectorized path EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, DoublePacketType& b0, DoublePacketType& b1) { // FIXME not sure that's the best way to implement it! loadRhs(b+0, b0); loadRhs(b+1, b1); } // Scalar path EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsScalar& b0, RhsScalar& b1) { // FIXME not sure that's the best way to implement it! loadRhs(b+0, b0); loadRhs(b+1, b1); } // nothing special here EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const { dest = pload((const typename unpacket_traits::type*)(a)); } EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacket& dest) const { dest = ploadu((const typename unpacket_traits::type*)(a)); } EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, DoublePacketType& c, RhsPacket& /*tmp*/) const { c.first = padd(pmul(a,b.first), c.first); c.second = padd(pmul(a,b.second),c.second); } EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, ResPacket& c, RhsPacket& /*tmp*/) const { c = cj.pmadd(a,b,c); } EIGEN_STRONG_INLINE void acc(const Scalar& c, const Scalar& alpha, Scalar& r) const { r += alpha * c; } EIGEN_STRONG_INLINE void acc(const DoublePacketType& c, const ResPacket& alpha, ResPacket& r) const { // assemble c ResPacket tmp; if((!ConjLhs)&&(!ConjRhs)) { tmp = pcplxflip(pconj(ResPacket(c.second))); tmp = padd(ResPacket(c.first),tmp); } else if((!ConjLhs)&&(ConjRhs)) { tmp = pconj(pcplxflip(ResPacket(c.second))); tmp = padd(ResPacket(c.first),tmp); } else if((ConjLhs)&&(!ConjRhs)) { tmp = pcplxflip(ResPacket(c.second)); tmp = padd(pconj(ResPacket(c.first)),tmp); } else if((ConjLhs)&&(ConjRhs)) { tmp = pcplxflip(ResPacket(c.second)); tmp = psub(pconj(ResPacket(c.first)),tmp); } r = pmadd(tmp,alpha,r); } protected: dconj_helper cj; }; template class gebp_traits, false, _ConjRhs > { public: typedef duals::dual Scalar; typedef RealScalar LhsScalar; typedef Scalar RhsScalar; typedef Scalar ResScalar; enum { ConjLhs = false, ConjRhs = _ConjRhs, Vectorizable = packet_traits::Vectorizable && packet_traits::Vectorizable, LhsPacketSize = Vectorizable ? packet_traits::size : 1, RhsPacketSize = Vectorizable ? packet_traits::size : 1, ResPacketSize = Vectorizable ? packet_traits::size : 1, NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS, // FIXME: should depend on NumberOfRegisters nr = 4, mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*ResPacketSize, LhsProgress = ResPacketSize, RhsProgress = 1 }; typedef typename packet_traits::type _LhsPacket; typedef typename packet_traits::type _RhsPacket; typedef typename packet_traits::type _ResPacket; typedef typename conditional::type LhsPacket; typedef typename conditional::type RhsPacket; typedef typename conditional::type ResPacket; typedef ResPacket AccPacket; EIGEN_STRONG_INLINE void initAcc(AccPacket& p) { p = pset1(ResScalar(0)); } EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const { dest = pset1(*b); } void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3) { pbroadcast4(b, b0, b1, b2, b3); } // EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1) // { // // FIXME not sure that's the best way to implement it! // b0 = pload1(b+0); // b1 = pload1(b+1); // } EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const { dest = ploaddup(a); } EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const { eigen_internal_assert(unpacket_traits::size<=4); loadRhs(b,dest); } EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacket& dest) const { dest = ploaddup(a); } EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp) const { madd_impl(a, b, c, tmp, typename conditional::type()); } EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const { //std::cerr << CYELLOW "*" << CRESET; //RealScalar, duals::dual #ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD EIGEN_UNUSED_VARIABLE(tmp); c.v = pmadd(a,b.v,c.v); #else tmp = b; tmp = RhsPacket(pmul(a,LhsPacket(tmp.v))); c = padd(c,tmp); #endif } EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const false_type&) const { c += a * b; } EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const { r = cj.pmadd(alpha,c,r); } protected: dconj_helper cj; }; } } // Eigen::internal #endif // gebp_traits //////// #endif // CPPDUALS_DONT_VECTORIZE #endif // CPPDUALS_DUAL_EIGEN