Updated docs
This commit is contained in:
parent
924dbd7d11
commit
29b96b3308
6
Macros.h
6
Macros.h
@ -37,6 +37,12 @@
|
||||
*/
|
||||
#define HD
|
||||
|
||||
/**
|
||||
* \def DEVICE_FUNC
|
||||
* Mark a function in front with this if it is only callable on the CUDA device.
|
||||
*/
|
||||
#define DEVICE_FUNC
|
||||
|
||||
/**
|
||||
* \def SHARED
|
||||
* Mark a variable as static shared memory.
|
||||
|
||||
30
Types.h
30
Types.h
@ -18,15 +18,15 @@ using real64 = double; /**< Type alias for 64-bit floating point datatype. */
|
||||
|
||||
#ifdef CUDACC
|
||||
|
||||
using real16 = __half;
|
||||
using realb16 = __nv_bfloat16;
|
||||
using real16 = __half; /**< Type alias for 16-bit floating point datatype, when using GPU.
|
||||
Otherwise, defaults to float. */
|
||||
using realb16 = __nv_bfloat16; /**< Type alias for the 16-bit bfloat datatype, when using GPU.
|
||||
Otherwise, defaults to float. */
|
||||
|
||||
#else
|
||||
|
||||
using real16 = float; /**< Type alias for 16-bit floating point datatype, when using GPU. Otherwise,
|
||||
defaults to float. */
|
||||
using realb16 = float; /**< Type alias for the 16-bit bfloat datatype, when using GPU. Otherwise,
|
||||
defaults to float. */
|
||||
using real16 = float;
|
||||
using realb16 = float;
|
||||
|
||||
#endif // CUDACC
|
||||
|
||||
@ -127,17 +127,17 @@ template complex<real32> operator/<real32>(const real32, const complex<real32>);
|
||||
template complex<real64> operator/<real64>(const real64, const complex<real64>);
|
||||
|
||||
#ifdef CUDACC
|
||||
using complex64 = complex<real32>;
|
||||
using complex128 = complex<real64>;
|
||||
using complex64 = complex<real32>; /**< Type alias for 64-bit complex floating point datatype.
|
||||
* This adapts depending on the CUDA compilation flag, and
|
||||
* will automatically switch std::complex<real32>. */
|
||||
|
||||
using complex128 = complex<real64>; /**< Type alias for 128-bit complex floating point datatype.
|
||||
* This adapts depending on the CUDA compilation flag, and will
|
||||
* automatically switch std::complex<real64>. */
|
||||
|
||||
#else
|
||||
using complex64 = std::complex<real32>; /**< Type alias for 64-bit complex floating point datatype.
|
||||
* This adapts depending on the CUDA compilation flag, and
|
||||
* will automatically switch CudaTools::complex<real32>. */
|
||||
using complex128 =
|
||||
std::complex<real64>; /**< Type alias for 128-bit complex floating point datatype. This adapts
|
||||
* depending on the CUDA compilation flag, and will automatically switch
|
||||
* CudaTools::complex<real64>. */
|
||||
using complex64 = std::complex<real32>;
|
||||
using complex128 = std::complex<real64>;
|
||||
#endif
|
||||
|
||||
/** Type alises and lots of metaprogramming definitions, primarily dealing with
|
||||
|
||||
@ -9,10 +9,21 @@ several classes to enable the usage of CUDA streams, kernels, and graphs.
|
||||
Types
|
||||
=====
|
||||
|
||||
.. doxygentypedef:: real32
|
||||
.. doxygentypedef:: real64
|
||||
.. doxygentypedef:: complex64
|
||||
.. doxygentypedef:: complex128
|
||||
These numeric types are defined to faciliate the special types used for CUDA,
|
||||
and is *necessary* to use them for functions to work properly. It is recommended
|
||||
to bring them into the global namespace if possible, by writing ``using namespace CudaTools::Types;``.
|
||||
|
||||
.. doxygentypedef:: CudaTools::Types::real32
|
||||
.. doxygentypedef:: CudaTools::Types::real64
|
||||
.. doxygentypedef:: CudaTools::Types::complex64
|
||||
.. doxygentypedef:: CudaTools::Types::complex128
|
||||
|
||||
These are types provided by the CUDA Math API, which cannot be easily used as computational
|
||||
types on host code. Take care when transferring these back to host functions, as further
|
||||
processing may require a type conversion.
|
||||
|
||||
.. doxygentypedef:: CudaTools::Types::real16
|
||||
.. doxygentypedef:: CudaTools::Types::realb16
|
||||
|
||||
Macro Definitions
|
||||
=================
|
||||
@ -25,6 +36,7 @@ Device Indicators
|
||||
Host-Device Automation
|
||||
----------------------
|
||||
.. doxygendefine:: HD
|
||||
.. doxygendefine:: DEVICE_FUNC
|
||||
.. doxygendefine:: SHARED
|
||||
|
||||
Compilation Options
|
||||
|
||||
@ -27,7 +27,7 @@ on a device. A kernel is a specific function that the host can call to be run on
|
||||
|
||||
Core Examples
|
||||
=============
|
||||
This file mainly introduces compiler macros and a few classes that are used to improve the
|
||||
This ``Core.h`` file mainly introduces compiler macros and a few classes that are used to improve the
|
||||
syntax between host and device code. To define and call a kernel, there are a few
|
||||
macros provided. For example,
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user