From 3be22c9a011d480ca714a97e32fa157344409d21 Mon Sep 17 00:00:00 2001 From: William Tambellini Date: Fri, 21 Dec 2018 11:31:50 -0800 Subject: [PATCH] Update nvidia gemm_bench.cu for mixed precision f16 to f32 --- code/nvidia/gemm_bench.cu | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/code/nvidia/gemm_bench.cu b/code/nvidia/gemm_bench.cu index ddb5f55..7f24f4d 100644 --- a/code/nvidia/gemm_bench.cu +++ b/code/nvidia/gemm_bench.cu @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -54,7 +55,7 @@ Supported precision types: For Maxwell GPUS: float for training and inference -For Pascal GPUS: +For Pascal/Volta GPUS: float, half for training float, half, int8 for inference @@ -85,11 +86,22 @@ int time_gemm(Tensor A, Tensor B, Tensor C, bool a_t, bool b_t, cubl cudaDataType_t compute_type = CUDA_R_32F; cublasGemmAlgo_t algo; - if (std::is_same::value) { + if (std::is_same::value) { A_type = CUDA_R_16F; B_type = CUDA_R_16F; - C_type = CUDA_R_16F; - compute_type = CUDA_R_16F; + } + + if (std::is_same::value) { + C_type = CUDA_R_32F; + compute_type = CUDA_R_32F; + } else if (std::is_same::value) { + C_type = CUDA_R_16F; + compute_type = CUDA_R_16F; + } else if (std::is_same::value) { + compute_type = CUDA_R_32I; + } else { + std::cerr << "Unsuported T2 (output) type" << std::endl; + exit(1); } if (std::is_same::value) { @@ -219,8 +231,7 @@ int main(int argc, char **argv) { if (status != CUBLAS_STATUS_SUCCESS) { std::cout << "CUBLAS math mode failed" << std::endl; - } - + } else std::cout << "CUBALS_TENSOR_OP_MATH ON" << std::endl; curandGenerator_t curand_gen; @@ -290,18 +301,24 @@ int main(int argc, char **argv) { if (!skip_kernel) time_ms = time_gemm(a, b, c, a_t, b_t, cublas_handle); } else if (precision == "half") { - auto a = rand({a_t ? k : m, a_t ? m : k}, curand_gen); - auto b = rand({b_t ? n : k, b_t ? k : n}, curand_gen); - auto c = zeros({m, n}); + auto a = rand<__half>({a_t ? k : m, a_t ? m : k}, curand_gen); + auto b = rand<__half>({b_t ? n : k, b_t ? k : n}, curand_gen); + auto c = zeros<__half>({m, n}); std::cout << std::setw(13) << precision; - time_ms = time_gemm(a, b, c, a_t, b_t, cublas_handle); + time_ms = time_gemm<__half, __half>(a, b, c, a_t, b_t, cublas_handle); } else if (precision == "float") { auto a = rand({a_t ? k : m, a_t ? m : k}, curand_gen); auto b = rand({b_t ? n : k, b_t ? k : n}, curand_gen); auto c = zeros({m, n}); std::cout << std::setw(13) << precision; time_ms = time_gemm(a, b, c, a_t, b_t, cublas_handle); - } else { + } else if (precision == "mixed") { // f16 x f16 to f32 + auto a = rand<__half>({a_t ? k : m, a_t ? m : k}, curand_gen); + auto b = rand<__half>({b_t ? n : k, b_t ? k : n}, curand_gen); + auto c = zeros({m, n}); + std::cout << std::setw(13) << precision; + time_ms = time_gemm<__half, float>(a, b, c, a_t, b_t, cublas_handle); + } else { throw std::runtime_error(ss.str()); } #else