![]() |
CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
Reference implementation for GEMM in host-side code. More...
#include "cutlass/coord.h"#include "cutlass/matrix_traits.h"#include "cutlass/tensor_view.h"#include "cutlass/gemm/gemm.h"#include "cutlass/util/reference/device/thread/gemm.h"

Go to the source code of this file.
Namespaces | |
| cutlass | |
| cutlass::reference | |
| cutlass::reference::device | |
| cutlass::reference::device::kernel | |
Functions | |
| template<typename TensorRefA , typename TensorRefB , typename TensorRefC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp > | |
| __global__ void | cutlass::reference::device::kernel::Gemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRefA tensor_a, TensorRefB tensor_b, ScalarType beta, TensorRefC tensor_c, TensorRefC tensor_d, AccumulatorType initial_accum) |
| template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType , typename OutputTile , typename InnerProductOp , typename ConvertOp > | |
| __global__ void | cutlass::reference::device::kernel::BatchedGemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRefCollectionA tensor_collection_a, TensorRefCollectionB tensor_collection_b, ScalarType beta, TensorRefCollectionC tensor_collection_c, AccumulatorType initial_accum) |
1.8.11