Updated docs
This commit is contained in:
parent
924dbd7d11
commit
29b96b3308
6
Macros.h
6
Macros.h
@ -37,6 +37,12 @@
|
|||||||
*/
|
*/
|
||||||
#define HD
|
#define HD
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \def DEVICE_FUNC
|
||||||
|
* Mark a function in front with this if it is only callable on the CUDA device.
|
||||||
|
*/
|
||||||
|
#define DEVICE_FUNC
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \def SHARED
|
* \def SHARED
|
||||||
* Mark a variable as static shared memory.
|
* Mark a variable as static shared memory.
|
||||||
|
|||||||
30
Types.h
30
Types.h
@ -18,15 +18,15 @@ using real64 = double; /**< Type alias for 64-bit floating point datatype. */
|
|||||||
|
|
||||||
#ifdef CUDACC
|
#ifdef CUDACC
|
||||||
|
|
||||||
using real16 = __half;
|
using real16 = __half; /**< Type alias for 16-bit floating point datatype, when using GPU.
|
||||||
using realb16 = __nv_bfloat16;
|
Otherwise, defaults to float. */
|
||||||
|
using realb16 = __nv_bfloat16; /**< Type alias for the 16-bit bfloat datatype, when using GPU.
|
||||||
|
Otherwise, defaults to float. */
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
using real16 = float; /**< Type alias for 16-bit floating point datatype, when using GPU. Otherwise,
|
using real16 = float;
|
||||||
defaults to float. */
|
using realb16 = float;
|
||||||
using realb16 = float; /**< Type alias for the 16-bit bfloat datatype, when using GPU. Otherwise,
|
|
||||||
defaults to float. */
|
|
||||||
|
|
||||||
#endif // CUDACC
|
#endif // CUDACC
|
||||||
|
|
||||||
@ -127,17 +127,17 @@ template complex<real32> operator/<real32>(const real32, const complex<real32>);
|
|||||||
template complex<real64> operator/<real64>(const real64, const complex<real64>);
|
template complex<real64> operator/<real64>(const real64, const complex<real64>);
|
||||||
|
|
||||||
#ifdef CUDACC
|
#ifdef CUDACC
|
||||||
using complex64 = complex<real32>;
|
using complex64 = complex<real32>; /**< Type alias for 64-bit complex floating point datatype.
|
||||||
using complex128 = complex<real64>;
|
* This adapts depending on the CUDA compilation flag, and
|
||||||
|
* will automatically switch std::complex<real32>. */
|
||||||
|
|
||||||
|
using complex128 = complex<real64>; /**< Type alias for 128-bit complex floating point datatype.
|
||||||
|
* This adapts depending on the CUDA compilation flag, and will
|
||||||
|
* automatically switch std::complex<real64>. */
|
||||||
|
|
||||||
#else
|
#else
|
||||||
using complex64 = std::complex<real32>; /**< Type alias for 64-bit complex floating point datatype.
|
using complex64 = std::complex<real32>;
|
||||||
* This adapts depending on the CUDA compilation flag, and
|
using complex128 = std::complex<real64>;
|
||||||
* will automatically switch CudaTools::complex<real32>. */
|
|
||||||
using complex128 =
|
|
||||||
std::complex<real64>; /**< Type alias for 128-bit complex floating point datatype. This adapts
|
|
||||||
* depending on the CUDA compilation flag, and will automatically switch
|
|
||||||
* CudaTools::complex<real64>. */
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/** Type alises and lots of metaprogramming definitions, primarily dealing with
|
/** Type alises and lots of metaprogramming definitions, primarily dealing with
|
||||||
|
|||||||
@ -9,10 +9,21 @@ several classes to enable the usage of CUDA streams, kernels, and graphs.
|
|||||||
Types
|
Types
|
||||||
=====
|
=====
|
||||||
|
|
||||||
.. doxygentypedef:: real32
|
These numeric types are defined to faciliate the special types used for CUDA,
|
||||||
.. doxygentypedef:: real64
|
and is *necessary* to use them for functions to work properly. It is recommended
|
||||||
.. doxygentypedef:: complex64
|
to bring them into the global namespace if possible, by writing ``using namespace CudaTools::Types;``.
|
||||||
.. doxygentypedef:: complex128
|
|
||||||
|
.. doxygentypedef:: CudaTools::Types::real32
|
||||||
|
.. doxygentypedef:: CudaTools::Types::real64
|
||||||
|
.. doxygentypedef:: CudaTools::Types::complex64
|
||||||
|
.. doxygentypedef:: CudaTools::Types::complex128
|
||||||
|
|
||||||
|
These are types provided by the CUDA Math API, which cannot be easily used as computational
|
||||||
|
types on host code. Take care when transferring these back to host functions, as further
|
||||||
|
processing may require a type conversion.
|
||||||
|
|
||||||
|
.. doxygentypedef:: CudaTools::Types::real16
|
||||||
|
.. doxygentypedef:: CudaTools::Types::realb16
|
||||||
|
|
||||||
Macro Definitions
|
Macro Definitions
|
||||||
=================
|
=================
|
||||||
@ -25,6 +36,7 @@ Device Indicators
|
|||||||
Host-Device Automation
|
Host-Device Automation
|
||||||
----------------------
|
----------------------
|
||||||
.. doxygendefine:: HD
|
.. doxygendefine:: HD
|
||||||
|
.. doxygendefine:: DEVICE_FUNC
|
||||||
.. doxygendefine:: SHARED
|
.. doxygendefine:: SHARED
|
||||||
|
|
||||||
Compilation Options
|
Compilation Options
|
||||||
|
|||||||
@ -27,7 +27,7 @@ on a device. A kernel is a specific function that the host can call to be run on
|
|||||||
|
|
||||||
Core Examples
|
Core Examples
|
||||||
=============
|
=============
|
||||||
This file mainly introduces compiler macros and a few classes that are used to improve the
|
This ``Core.h`` file mainly introduces compiler macros and a few classes that are used to improve the
|
||||||
syntax between host and device code. To define and call a kernel, there are a few
|
syntax between host and device code. To define and call a kernel, there are a few
|
||||||
macros provided. For example,
|
macros provided. For example,
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user