//===-- bench_gemm - benchmark dual m*m -----------------------*- C++ -*-===// // // Part of the cppduals Project // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // (c)2019 Michael Tesch. tesch1@gmail.com // #if defined(__APPLE__) && defined(__clang__) #include #else #ifdef EIGEN_LAPACKE #include #else #include #endif extern "C" { //#include //#include #include CBLAS_HEADER } #endif // defined(__APPLE__) && defined(__clang__) #include #include #include #include #include "type_name.hpp" #include #include #include #include "benchmark/benchmark.h" using namespace duals; template< class T > struct type_identity { typedef T type; }; #include /* encode the type into an integer for benchmark output */ template struct type_num { /* should fail */ }; template<> struct type_num { static constexpr int id = 1; }; template<> struct type_num { static constexpr int id = 2; }; template<> struct type_num { static constexpr int id = 3; }; template struct type_num> { static constexpr int id = 10 + type_num::id; }; template struct type_num> { static constexpr int id = 100 + type_num::id; }; using duals::dualf; using duals::duald; typedef std::complex complexd; typedef std::complex complexf; typedef std::complex cduald; typedef std::complex cdualf; template using MatrixX = Eigen::Matrix; #if 0 #define V_RANGE(V,NF) ->Arg(V*4/NF)->Arg(V*32/NF)->Arg(V*256/NF)->Arg(V*2048/NF)->Arg(V*1)->Complexity() #else #define V_RANGE(V,NF) ->Arg(V*64/NF)->Arg(V*128/NF)->Arg(V*256/NF)->Arg(V*512/NF)->Arg(V*1024/NF) ->Arg(V*2048/NF) #endif // measure Eigen's matrix-matrix multiplication template void B_MatMat(benchmark::State& state) { int N = state.range(0); typedef typename Eigen::ScalarBinaryOpTraits::ReturnType R; MatrixX A = MatrixX::Random(N, N); MatrixX B = MatrixX::Random(N, N); MatrixX C = MatrixX::Random(N, N); for (auto _ : state) { C.noalias() = A * B; benchmark::ClobberMemory(); // Force c to be written to memory. } state.SetComplexityN(state.range(0)); } template ::value>::type* = nullptr> void matrix_multiplcation(T *A, int Awidth, int Aheight, T *B, int Bwidth, int Bheight, T *AB, bool tA, bool tB, typename type_identity::type beta) { int A_height = tA ? Awidth : Aheight; int A_width = tA ? Aheight : Awidth; #ifndef NDEBUG int B_height = tB ? Bwidth : Bheight; #endif int B_width = tB ? Bheight : Bwidth; int m = A_height; int n = B_width; int k = A_width; // Error, width and height should match! assert(A_width == B_height); int lda = tA ? m : k; int ldb = tB ? k : n; #define TRANSPOSE(X) ((X) ? CblasTrans : CblasNoTrans) // http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html if (!is_complex::value) { if (sizeof(T) == sizeof(float)) cblas_sgemm(CblasColMajor, TRANSPOSE(tA), TRANSPOSE(tB), m, n, k, 1.0, (float *)A, lda, (float *)B, ldb, std::real(beta), (float *)AB, n); else cblas_dgemm(CblasColMajor, TRANSPOSE(tA), TRANSPOSE(tB), m, n, k, 1.0, (double *)A, lda, (double *)B, ldb, std::real(beta), (double *)AB, n); } else { std::complex alphaf(1,0); std::complex alpha(1,0); if (Eigen::NumTraits::digits10() < 10) cblas_cgemm(CblasColMajor, TRANSPOSE(tA), TRANSPOSE(tB), m, n, k, &alphaf, A, lda, B, ldb, &beta, AB, n); else cblas_zgemm(CblasColMajor, TRANSPOSE(tA), TRANSPOSE(tB), m, n, k, &alpha, A, lda, B, ldb, &beta, AB, n); } #undef TRANSPOSE } template ::value>::type* = nullptr> void matrix_multiplcation(T *A, int Awidth, int Aheight, T *B, int Bwidth, int Bheight, T *AB, bool tA, bool tB, typename type_identity::type beta) { /* nothing */ } // measure BLAS matrix-matrix multiplication template void B_MatMatBLAS(benchmark::State& state) { int N = state.range(0); MatrixX A = MatrixX::Random(N, N); MatrixX B = MatrixX::Random(N, N); MatrixX C = MatrixX::Random(N, N); MatrixX D = A*B; for (auto _ : state) { matrix_multiplcation(A.data(), A.cols(), A.rows(), B.data(), B.cols(), B.rows(), C.data(), false, false, (Rt)0.); benchmark::ClobberMemory(); // Force a to be written to memory. } double err = (double)rpart((D - C).norm() / D.norm()); if (err > 1e-6) state.SkipWithError("BLAS matmat error"); state.SetComplexityN(state.range(0)); } // measure compiler's matrix-matrix multiplication template void B_MatMatCXX(benchmark::State& state) { int N = state.range(0); std::vector a(N*N); std::vector b(N*N); std::vector c(N*N); for (auto _ : state) { state.PauseTiming(); a.assign(N*N,1.1); b.assign(N*N,2.2); c.assign(N*N,0.); state.ResumeTiming(); for(int i=0; i void B_MatVec(benchmark::State& state) { int N = state.range(0); MatrixX A = MatrixX::Random(N, N); MatrixX b = MatrixX::Random(N, 1); MatrixX c = MatrixX::Random(N, 1); for (auto _ : state) { c = A * b; benchmark::ClobberMemory(); } state.counters["type"] = type_num::id; state.SetComplexityN(state.range(0)); } #define MAKE_BM_SIMPLE(TYPE1,TYPE2,NF) \ BENCHMARK_TEMPLATE(B_MatMat, TYPE1,TYPE2) V_RANGE(1,NF) #define MAKE_BENCHMARKS(TYPE1,TYPE2,NF) \ MAKE_BM_SIMPLE(TYPE1,TYPE2,NF); \ BENCHMARK_TEMPLATE(B_MatMatBLAS, TYPE1) V_RANGE(1,NF) // BENCHMARK_TEMPLATE(B_MatMatBLAS, TYPE1) V_RANGE(1,2*NF); // BENCHMARK_TEMPLATE(B_VecVecMulCXX, TYPE1,TYPE2) V_RANGE(4,NF); // BENCHMARK_TEMPLATE(B_MatMatCXX, TYPE1,TYPE2) V_RANGE(1,NF); MAKE_BENCHMARKS(float, float, 1); MAKE_BENCHMARKS(complexf, complexf,2); //MAKE_BM_SIMPLE(dualf, float,2); TODO MAKE_BM_SIMPLE(dualf, dualf,2); //MAKE_BM_SIMPLE(cdualf, cdualf,2); MAKE_BM_SIMPLE(cdualf, cdualf,4); #if HAVE_BOOST #include MAKE_BM_SIMPLE(audi::gdual,2); #endif // novelty: //MAKE_BM_SIMPLE(float, complexf,2); //MAKE_BM_SIMPLE(complexf, float,2); MAKE_BENCHMARKS(double, double, 1); MAKE_BENCHMARKS(complexd, complexd,2); MAKE_BM_SIMPLE(duald, duald,2); MAKE_BM_SIMPLE(cduald, cduald,4); #define QUOTE(...) STRFY(__VA_ARGS__) #define STRFY(...) #__VA_ARGS__ int main(int argc, char** argv) { #ifndef EIGEN_VECTORIZE static_assert(false, "no vectorization?"); #endif #ifndef NDEBUG static_assert(false, "NDEBUG to benchmark?"); #endif std::cout << "OPT_FLAGS=" << QUOTE(OPT_FLAGS) << "\n"; std::cout << "INSTRUCTIONSET=" << Eigen::SimdInstructionSetsInUse() << "\n"; ::benchmark::Initialize(&argc, argv); ::benchmark::RunSpecifiedBenchmarks(); }