Corrected examples for Array
This commit is contained in:
parent
e8e8dcc073
commit
2161c62608
26
Array.h
26
Array.h
@ -450,10 +450,7 @@ template <typename T> class Array {
|
||||
HD Array reshaped(const Shape& new_shape) const {
|
||||
CT_ERROR_IF(shape().items(), !=, new_shape.items(),
|
||||
"New shape cannot have a different number of terms");
|
||||
if (mIsSlice) {
|
||||
Array<T> arr = this->copy();
|
||||
return arr.reshaped(new_shape);
|
||||
}
|
||||
CT_ERROR(mIsSlice, "Cannot reshape slice, a new array must be made. (Try copy first)")
|
||||
Array<T> arr = view();
|
||||
arr.mShape = new_shape;
|
||||
return arr;
|
||||
@ -462,7 +459,7 @@ template <typename T> class Array {
|
||||
HD void reshape(const Shape& new_shape) {
|
||||
CT_ERROR_IF(shape().items(), !=, new_shape.items(),
|
||||
"New shape cannot have a different number of terms");
|
||||
CT_ERROR(mIsSlice, "Cannot reshape slice, a new array must be made. (Try reshaped instead)")
|
||||
CT_ERROR(mIsSlice, "Cannot reshape slice, a new array must be made. (Try copy first)")
|
||||
mShape = new_shape;
|
||||
};
|
||||
|
||||
@ -471,13 +468,26 @@ template <typename T> class Array {
|
||||
* single vectors to their 2D counterparts.
|
||||
*/
|
||||
HD Array atLeast2D() const {
|
||||
return (shape().axes() == 1) ? Array(*this, {shape().length(), 1}) : view();
|
||||
return (shape().axes() == 1) ? reshaped({shape().length(), 1}) : view();
|
||||
};
|
||||
|
||||
/**
|
||||
* Flattens the Array into one dimension.
|
||||
* Reshapes this array, making it at least 2D. Useful for promoting
|
||||
* single vectors to their 2D counterparts.
|
||||
*/
|
||||
HD Array flatten() const { return reshape({mShape.mItems}); };
|
||||
HD void asAtLeast2D() {
|
||||
if (shape().axes() == 1) reshape({shape().length(), 1});
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns a view of this Array that has been flattened into one dimension.
|
||||
*/
|
||||
HD Array flattened() const { return reshaped({mShape.mItems}); };
|
||||
|
||||
/**
|
||||
* Flattens this Array into one dimension.
|
||||
*/
|
||||
HD void flatten() { reshape({mShape.mItems}); };
|
||||
|
||||
/**
|
||||
* Returns the Eigen::Map of this Array.
|
||||
|
||||
@ -136,38 +136,53 @@ We can demonstrate a few here.
|
||||
|
||||
.. code-block:: cpp
|
||||
|
||||
DEFINE_KERNEL(times2, const CudaTools::Array<int>& arr) {
|
||||
BASIC_LOOP(arr.shape().items()) {
|
||||
arr[iThread] *= 2;
|
||||
}
|
||||
DEFINE_KERNEL(times2, const CudaTools::Array<int> arr) {
|
||||
CudaTools::Array<int> flat = arr.flattened();
|
||||
BASIC_LOOP(arr.shape().items()) { flat[iThread] *= 2; }
|
||||
}
|
||||
|
||||
DEFINE_KERNEL(times2double, const CudaTools::Array<double> arr) {
|
||||
CudaTools::Array<double> flat = arr.flattened();
|
||||
BASIC_LOOP(arr.shape().items()) { flat[iThread] *= 2; }
|
||||
}
|
||||
|
||||
int main() {
|
||||
CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(0, 10);
|
||||
CudaTools::Array<int> arrConst = CudaTools::Array<int>::constant(1);
|
||||
CudaTools::Array<double> arrLinspace = CudaTools::Array<int>::linspace(0, 5, 10);
|
||||
CudaTools::Array<int> arrConst = CudaTools::Array<int>::constant({10}, 1);
|
||||
CudaTools::Array<double> arrLinspace = CudaTools::Array<double>::linspace(0, 5, 10);
|
||||
CudaTools::Array<int> arrComma({2, 2}); // 2x2 array.
|
||||
arrComma << 1, 2, 3, 4; // Comma initializer if needed.
|
||||
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma "\n";
|
||||
arrComma << 1, 2, 3, 4; // Comma initializer if needed.
|
||||
|
||||
arrRange.updateDevice();
|
||||
arrConst.updateDevice();
|
||||
arrLinspace.updateDevice();
|
||||
arrComma.updateDevice().wait();
|
||||
|
||||
std::cout << "Before Kernel:\n";
|
||||
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma << "\n";
|
||||
|
||||
// Call the kernel multiple times asynchronously. Note: since they share same
|
||||
// stream, they are not run in parallel, just queued on the device.
|
||||
KERNEL(times2, CudaTools::Kernel::basic(arrRange.shape().items()), arrRange);
|
||||
KERNEL(times2, CudaTools::Kernel::basic(arrConst.shape().items()), arrRange);
|
||||
KERNEL(times2, CudaTools::Kernel::basic(arrLinspace.shape().items()), arrRange).wait();
|
||||
KERNEL(times2, CudaTools::Kernel::basic(arrComma.shape().items()), arrRange).wait();
|
||||
// NOTE: Notice that a view is passed into the kernel, not the Array itself.
|
||||
KERNEL(times2, CudaTools::Kernel::basic(arrRange.shape().items()), arrRange.view());
|
||||
KERNEL(times2, CudaTools::Kernel::basic(arrConst.shape().items()), arrConst.view());
|
||||
KERNEL(times2double, CudaTools::Kernel::basic(arrLinspace.shape().items()), arrLinspace.view());
|
||||
KERNEL(times2, CudaTools::Kernel::basic(arrComma.shape().items()), arrComma.view()).wait();
|
||||
arrRange.updateHost();
|
||||
arrConst.updateHost();
|
||||
arrLinspace.updateHost();
|
||||
arrComma.updateHost().wait(); // Only need to wait for the last one, since they have the same stream.
|
||||
arrComma.updateHost().wait(); // Same stream, so you should wait for the last call.
|
||||
|
||||
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma "\n";
|
||||
std::cout << "After Kernel:\n";
|
||||
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma << "\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
In this example, we show a few ways to initialize an ``Array`` through some static functions.
|
||||
It is templated, so it can (theoretically) support any type. Additionally, you can initialize an
|
||||
empty ``Array`` by providing its ``Shape`` with an initializer list (ex: ``{2, 2}``). For more details,
|
||||
empty ``Array`` by providing its ``Shape`` with an initializer list (ex: ``{2, 2}``). Many of these
|
||||
array functions and initializers have view-returning and self-assigning versions. For instance,
|
||||
``.flattened()`` returns a flattened view of an Array, and does not modify the original. For more details,
|
||||
see :ref:`here <CudaTools::Array<T>>`.
|
||||
|
||||
We also note the use of ``BASIC_LOOP(N)``, which is a macro for generating the loop automatically
|
||||
@ -175,28 +190,32 @@ on the kernel given the number of threads. It is intended to be used only for "e
|
||||
situations and with the ``CudaTools::Kernel::basic()`` launch parameters. If compiling for CPU, it will
|
||||
mark the loop with ``#pragma parallel for`` and attempt to use OpenMP for parallelism.
|
||||
|
||||
.. warning::
|
||||
Notice that a view must be passed to the kernel, and not the original object. This
|
||||
|
||||
The Array also supports other helpful functions, such as multi-dimensional indexing, slicing, and
|
||||
a few other functions.
|
||||
|
||||
.. code-block:: cpp
|
||||
|
||||
int main() {
|
||||
CudaTools::Array<int> arr = CudaTools::Array<int>::constant(0);
|
||||
CudaTools::Array<int> arr = CudaTools::Array<int>::constant({100}, 0);
|
||||
arr.reshape({4, 5, 5}); // Creates a three dimensional array.
|
||||
|
||||
arr[0][0][0] = 1; // Axis by axis indexing.
|
||||
arr[0][0][0] = 1; // Axis by axis indexing.
|
||||
arr[{1, 0, 0}] = 100; // Specific 'coordinate' indexing.
|
||||
std::cout << arr << "\n";
|
||||
|
||||
CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(18);
|
||||
auto arrSlice = arr.slice({{1, 2}, {1, 4}, {1, 4}}). // Takes a slice of the center.
|
||||
CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(0, 18);
|
||||
auto arrSlice = arr.slice({{1, 3}, {1, 4}, {1, 4}}); // Takes a slice of the center.
|
||||
std::cout << "Before Copy:\n" << arrSlice << "\n";
|
||||
arrSlice = arrRange; // Copies arrRange into arrSlice. (Does NOT replace!)
|
||||
std::cout << "After Copy:\n" << arrSlice << "\n";
|
||||
|
||||
std::cout << "Modified: \n" << arr << "\n"; // The original array is modified, since a slice does not copy.
|
||||
std::cout << "Modified: \n"
|
||||
<< arr << "\n"; // The original array is modified, since a slice does not copy.
|
||||
|
||||
CudaTools::Array<int> newArr = arr.copy(); // Copies the original Array.
|
||||
CudaTools::Array<int> newArr = arr.copy(); // Copies the original Array.
|
||||
for (auto it = newArr.begin(); it != newArr.end(); ++it) { // Iterate through the array.
|
||||
*it = 1;
|
||||
}
|
||||
|
||||
@ -1,34 +1,45 @@
|
||||
#define CUDATOOLS_IMPLEMENTATION
|
||||
#include <Core.h>
|
||||
#include <Array.h>
|
||||
#include <Core.h>
|
||||
|
||||
DEFINE_KERNEL(times2, const CudaTools::Array<int>& arr) {
|
||||
BASIC_LOOP(arr.shape().items()) {
|
||||
arr[iThread] *= 2;
|
||||
}
|
||||
DEFINE_KERNEL(times2, const CudaTools::Array<int> arr) {
|
||||
CudaTools::Array<int> flat = arr.flattened();
|
||||
BASIC_LOOP(arr.shape().items()) { flat[iThread] *= 2; }
|
||||
}
|
||||
|
||||
DEFINE_KERNEL(times2double, const CudaTools::Array<double> arr) {
|
||||
CudaTools::Array<double> flat = arr.flattened();
|
||||
BASIC_LOOP(arr.shape().items()) { flat[iThread] *= 2; }
|
||||
}
|
||||
|
||||
int main() {
|
||||
CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(0, 10);
|
||||
CudaTools::Array<int> arrConst = CudaTools::Array<int>::constant(1);
|
||||
CudaTools::Array<double> arrLinspace = CudaTools::Array<int>::linspace(0, 5, 10);
|
||||
CudaTools::Array<int> arrConst = CudaTools::Array<int>::constant({10}, 1);
|
||||
CudaTools::Array<double> arrLinspace = CudaTools::Array<double>::linspace(0, 5, 10);
|
||||
CudaTools::Array<int> arrComma({2, 2}); // 2x2 array.
|
||||
arrComma << 1, 2, 3, 4; // Comma initializer if needed.
|
||||
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma "\n";
|
||||
arrComma << 1, 2, 3, 4; // Comma initializer if needed.
|
||||
|
||||
arrRange.updateDevice();
|
||||
arrConst.updateDevice();
|
||||
arrLinspace.updateDevice();
|
||||
arrComma.updateDevice().wait();
|
||||
|
||||
std::cout << "Before Kernel:\n";
|
||||
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma << "\n";
|
||||
|
||||
// Call the kernel multiple times asynchronously. Note: since they share same
|
||||
// stream, they are not run in parallel, just queued on the device.
|
||||
KERNEL(times2, CudaTools::Kernel::basic(arrRange.shape().items()), arrRange);
|
||||
KERNEL(times2, CudaTools::Kernel::basic(arrConst.shape().items()), arrRange);
|
||||
KERNEL(times2, CudaTools::Kernel::basic(arrLinspace.shape().items()), arrRange).wait();
|
||||
KERNEL(times2, CudaTools::Kernel::basic(arrComma.shape().items()), arrRange).wait();
|
||||
// NOTE: Notice that a view is passed into the kernel, not the Array itself.
|
||||
KERNEL(times2, CudaTools::Kernel::basic(arrRange.shape().items()), arrRange.view());
|
||||
KERNEL(times2, CudaTools::Kernel::basic(arrConst.shape().items()), arrConst.view());
|
||||
KERNEL(times2double, CudaTools::Kernel::basic(arrLinspace.shape().items()), arrLinspace.view());
|
||||
KERNEL(times2, CudaTools::Kernel::basic(arrComma.shape().items()), arrComma.view()).wait();
|
||||
arrRange.updateHost();
|
||||
arrConst.updateHost();
|
||||
arrLinspace.updateHost();
|
||||
arrComma.updateHost().wait(); // Only need to wait for the last one, since they have the same stream.
|
||||
arrComma.updateHost().wait(); // Same stream, so you should wait for the last call.
|
||||
|
||||
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma "\n";
|
||||
std::cout << "After Kernel:\n";
|
||||
std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma << "\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -14,30 +14,7 @@ SRC_DIR = .
|
||||
BUILD_DIR = build
|
||||
|
||||
# Should not need to modify below.
|
||||
int main() {
|
||||
CudaTools::Array<int> arr = CudaTools::Array<int>::constant(0);
|
||||
arr.reshape({4, 5, 5}); // Creates a three dimensional array.
|
||||
|
||||
arr[0][0][0] = 1; // Axis by axis indexing.
|
||||
arr[{1, 0, 0}] = 100; // Specific 'coordinate' indexing.
|
||||
std::cout << arr << "\n";
|
||||
|
||||
CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(18);
|
||||
auto arrSlice = arr.slice({{1, 2}, {1, 4}, {1, 4}}). // Takes a slice of the center.
|
||||
std::cout << "Before Copy:\n" << arrSlice << "\n";
|
||||
arrSlice = arrRange; // Copies arrRange into arrSlice. (Does NOT replace!)
|
||||
std::cout << "After Copy:\n" << arrSlice << "\n";
|
||||
|
||||
std::cout << "Modified: \n" << arr << "\n"; // The original array is modified, since a slice does not copy.
|
||||
|
||||
CudaTools::Array<int> newArr = arr.copy(); // Copies the original Array.
|
||||
for (auto it = newArr.begin(); it != newArr.end(); ++it) { // Iterate through the array.
|
||||
*it = 1;
|
||||
}
|
||||
std::cout << "Modified New Array:\n" << newArr << "\n";
|
||||
std::cout << "Old Array:\n" << arr << "\n"; // The original array was not modified after a copy.
|
||||
return 0;
|
||||
}
|
||||
CPU_BUILD_DIR = $(BUILD_DIR)/cpu
|
||||
GPU_BUILD_DIR = $(BUILD_DIR)/gpu
|
||||
|
||||
|
||||
@ -1,24 +1,25 @@
|
||||
#define CUDATOOLS_IMPLEMENTATION
|
||||
#include <Core.h>
|
||||
#include <Array.h>
|
||||
#include <Core.h>
|
||||
|
||||
int main() {
|
||||
CudaTools::Array<int> arr = CudaTools::Array<int>::constant(0);
|
||||
CudaTools::Array<int> arr = CudaTools::Array<int>::constant({100}, 0);
|
||||
arr.reshape({4, 5, 5}); // Creates a three dimensional array.
|
||||
|
||||
arr[0][0][0] = 1; // Axis by axis indexing.
|
||||
arr[0][0][0] = 1; // Axis by axis indexing.
|
||||
arr[{1, 0, 0}] = 100; // Specific 'coordinate' indexing.
|
||||
std::cout << arr << "\n";
|
||||
|
||||
CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(18);
|
||||
auto arrSlice = arr.slice({{1, 2}, {1, 4}, {1, 4}}). // Takes a slice of the center.
|
||||
CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(0, 18);
|
||||
auto arrSlice = arr.slice({{1, 3}, {1, 4}, {1, 4}}); // Takes a slice of the center.
|
||||
std::cout << "Before Copy:\n" << arrSlice << "\n";
|
||||
arrSlice = arrRange; // Copies arrRange into arrSlice. (Does NOT replace!)
|
||||
std::cout << "After Copy:\n" << arrSlice << "\n";
|
||||
|
||||
std::cout << "Modified: \n" << arr << "\n"; // The original array is modified, since a slice does not copy.
|
||||
std::cout << "Modified: \n"
|
||||
<< arr << "\n"; // The original array is modified, since a slice does not copy.
|
||||
|
||||
CudaTools::Array<int> newArr = arr.copy(); // Copies the original Array.
|
||||
CudaTools::Array<int> newArr = arr.copy(); // Copies the original Array.
|
||||
for (auto it = newArr.begin(); it != newArr.end(); ++it) { // Iterate through the array.
|
||||
*it = 1;
|
||||
}
|
||||
@ -26,5 +27,3 @@ int main() {
|
||||
std::cout << "Old Array:\n" << arr << "\n"; // The original array was not modified after a copy.
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user