Corrected examples for Array

2023-04-21 15:39:14 -05:00 · 2023-04-21 15:39:14 -05:00 · 2161c62608
commit 2161c62608
parent e8e8dcc073
5 changed files with 94 additions and 78 deletions
--- a/Array.h
+++ b/Array.h
@ -450,10 +450,7 @@ template <typename T> class Array {
    HD Array reshaped(const Shape& new_shape) const {
        CT_ERROR_IF(shape().items(), !=, new_shape.items(),
                    "New shape cannot have a different number of terms");
-        if (mIsSlice) {
-            Array<T> arr = this->copy();
-            return arr.reshaped(new_shape);
-        }
+        CT_ERROR(mIsSlice, "Cannot reshape slice, a new array must be made. (Try copy first)")
        Array<T> arr = view();
        arr.mShape = new_shape;
        return arr;
@ -462,7 +459,7 @@ template <typename T> class Array {
    HD void reshape(const Shape& new_shape) {
        CT_ERROR_IF(shape().items(), !=, new_shape.items(),
                    "New shape cannot have a different number of terms");
-        CT_ERROR(mIsSlice, "Cannot reshape slice, a new array must be made. (Try reshaped instead)")
+        CT_ERROR(mIsSlice, "Cannot reshape slice, a new array must be made. (Try copy first)")
        mShape = new_shape;
    };

@ -471,13 +468,26 @@ template <typename T> class Array {
     * single vectors to their 2D counterparts.
     */
    HD Array atLeast2D() const {
-        return (shape().axes() == 1) ? Array(*this, {shape().length(), 1}) : view();
+        return (shape().axes() == 1) ? reshaped({shape().length(), 1}) : view();
    };

    /**
-     * Flattens the Array into one dimension.
+     * Reshapes this array, making it at least 2D. Useful for promoting
+     * single vectors to their 2D counterparts.
     */
-    HD Array flatten() const { return reshape({mShape.mItems}); };
+    HD void asAtLeast2D() {
+        if (shape().axes() == 1) reshape({shape().length(), 1});
+    };
+
+    /**
+     * Returns a view of this Array that has been flattened into one dimension.
+     */
+    HD Array flattened() const { return reshaped({mShape.mItems}); };
+
+    /**
+     * Flattens this Array into one dimension.
+     */
+    HD void flatten() { reshape({mShape.mItems}); };

    /**
     * Returns the Eigen::Map of this Array.
--- a/docs/source/usage.rst
+++ b/docs/source/usage.rst
@ -136,38 +136,53 @@ We can demonstrate a few here.

 .. code-block:: cpp

-    DEFINE_KERNEL(times2, const CudaTools::Array<int>& arr) {
-        BASIC_LOOP(arr.shape().items()) {
-            arr[iThread] *= 2;
-        }
+    DEFINE_KERNEL(times2, const CudaTools::Array<int> arr) {
+        CudaTools::Array<int> flat = arr.flattened();
+        BASIC_LOOP(arr.shape().items()) { flat[iThread] *= 2; }
+    }
+
+    DEFINE_KERNEL(times2double, const CudaTools::Array<double> arr) {
+        CudaTools::Array<double> flat = arr.flattened();
+        BASIC_LOOP(arr.shape().items()) { flat[iThread] *= 2; }
    }

    int main() {
        CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(0, 10);
-        CudaTools::Array<int> arrConst = CudaTools::Array<int>::constant(1);
-        CudaTools::Array<double> arrLinspace = CudaTools::Array<int>::linspace(0, 5, 10);
+        CudaTools::Array<int> arrConst = CudaTools::Array<int>::constant({10}, 1);
+        CudaTools::Array<double> arrLinspace = CudaTools::Array<double>::linspace(0, 5, 10);
        CudaTools::Array<int> arrComma({2, 2}); // 2x2 array.
-        arrComma << 1, 2, 3, 4; // Comma initializer if needed.
-        std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma "\n";
+        arrComma << 1, 2, 3, 4;                 // Comma initializer if needed.
+
+        arrRange.updateDevice();
+        arrConst.updateDevice();
+        arrLinspace.updateDevice();
+        arrComma.updateDevice().wait();
+
+        std::cout << "Before Kernel:\n";
+        std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma << "\n";

        // Call the kernel multiple times asynchronously. Note: since they share same
        // stream, they are not run in parallel, just queued on the device.
-        KERNEL(times2, CudaTools::Kernel::basic(arrRange.shape().items()), arrRange);
-        KERNEL(times2, CudaTools::Kernel::basic(arrConst.shape().items()), arrRange);
-        KERNEL(times2, CudaTools::Kernel::basic(arrLinspace.shape().items()), arrRange).wait();
-        KERNEL(times2, CudaTools::Kernel::basic(arrComma.shape().items()), arrRange).wait();
+        // NOTE: Notice that a view is passed into the kernel, not the Array itself.
+        KERNEL(times2, CudaTools::Kernel::basic(arrRange.shape().items()), arrRange.view());
+        KERNEL(times2, CudaTools::Kernel::basic(arrConst.shape().items()), arrConst.view());
+        KERNEL(times2double, CudaTools::Kernel::basic(arrLinspace.shape().items()), arrLinspace.view());
+        KERNEL(times2, CudaTools::Kernel::basic(arrComma.shape().items()), arrComma.view()).wait();
        arrRange.updateHost();
        arrConst.updateHost();
        arrLinspace.updateHost();
-        arrComma.updateHost().wait(); // Only need to wait for the last one, since they have the same stream.
+        arrComma.updateHost().wait(); // Same stream, so you should wait for the last call.

-        std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma "\n";
+        std::cout << "After Kernel:\n";
+        std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma << "\n";
        return 0;
    }

 In this example, we show a few ways to initialize an ``Array`` through some static functions.
 It is templated, so it can (theoretically) support any type. Additionally, you can initialize an
-empty ``Array`` by providing its ``Shape`` with an initializer list (ex: ``{2, 2}``). For more details,
+empty ``Array`` by providing its ``Shape`` with an initializer list (ex: ``{2, 2}``). Many of these
+array functions and initializers have view-returning and self-assigning versions. For instance,
+``.flattened()`` returns a flattened view of an Array, and does not modify the original. For more details,
 see :ref:`here <CudaTools::Array<T>>`.

 We also note the use of ``BASIC_LOOP(N)``, which is a macro for generating the loop automatically
@ -175,28 +190,32 @@ on the kernel given the number of threads. It is intended to be used only for "e
 situations and with the ``CudaTools::Kernel::basic()`` launch parameters. If compiling for CPU, it will
 mark the loop with ``#pragma parallel for`` and attempt to use OpenMP for parallelism.

+.. warning::
+   Notice that a view must be passed to the kernel, and not the original object. This
+
 The Array also supports other helpful functions, such as multi-dimensional indexing, slicing, and
 a few other functions.

 .. code-block:: cpp

    int main() {
-        CudaTools::Array<int> arr = CudaTools::Array<int>::constant(0);
+        CudaTools::Array<int> arr = CudaTools::Array<int>::constant({100}, 0);
        arr.reshape({4, 5, 5}); // Creates a three dimensional array.

-        arr[0][0][0] = 1; // Axis by axis indexing.
+        arr[0][0][0] = 1;     // Axis by axis indexing.
        arr[{1, 0, 0}] = 100; // Specific 'coordinate' indexing.
        std::cout << arr << "\n";

-        CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(18);
-        auto arrSlice = arr.slice({{1, 2}, {1, 4}, {1, 4}}). // Takes a slice of the center.
+        CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(0, 18);
+        auto arrSlice = arr.slice({{1, 3}, {1, 4}, {1, 4}}); // Takes a slice of the center.
        std::cout << "Before Copy:\n" << arrSlice << "\n";
        arrSlice = arrRange; // Copies arrRange into arrSlice. (Does NOT replace!)
        std::cout << "After Copy:\n" << arrSlice << "\n";

-        std::cout << "Modified: \n" << arr << "\n"; // The original array is modified, since a slice does not copy.
+        std::cout << "Modified: \n"
+                  << arr << "\n"; // The original array is modified, since a slice does not copy.

-        CudaTools::Array<int> newArr = arr.copy(); // Copies the original Array.
+        CudaTools::Array<int> newArr = arr.copy();                 // Copies the original Array.
        for (auto it = newArr.begin(); it != newArr.end(); ++it) { // Iterate through the array.
            *it = 1;
        }
--- a/samples/3_ArrayKernel/main.cu.cpp
+++ b/samples/3_ArrayKernel/main.cu.cpp
@ -1,34 +1,45 @@
 #define CUDATOOLS_IMPLEMENTATION
-#include <Core.h>
 #include <Array.h>
+#include <Core.h>

-DEFINE_KERNEL(times2, const CudaTools::Array<int>& arr) {
-    BASIC_LOOP(arr.shape().items()) {
-        arr[iThread] *= 2;
-    }
+DEFINE_KERNEL(times2, const CudaTools::Array<int> arr) {
+    CudaTools::Array<int> flat = arr.flattened();
+    BASIC_LOOP(arr.shape().items()) { flat[iThread] *= 2; }
+}
+
+DEFINE_KERNEL(times2double, const CudaTools::Array<double> arr) {
+    CudaTools::Array<double> flat = arr.flattened();
+    BASIC_LOOP(arr.shape().items()) { flat[iThread] *= 2; }
 }

 int main() {
    CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(0, 10);
-    CudaTools::Array<int> arrConst = CudaTools::Array<int>::constant(1);
-    CudaTools::Array<double> arrLinspace = CudaTools::Array<int>::linspace(0, 5, 10);
+    CudaTools::Array<int> arrConst = CudaTools::Array<int>::constant({10}, 1);
+    CudaTools::Array<double> arrLinspace = CudaTools::Array<double>::linspace(0, 5, 10);
    CudaTools::Array<int> arrComma({2, 2}); // 2x2 array.
-    arrComma << 1, 2, 3, 4; // Comma initializer if needed.
-    std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma "\n";
+    arrComma << 1, 2, 3, 4;                 // Comma initializer if needed.
+
+    arrRange.updateDevice();
+    arrConst.updateDevice();
+    arrLinspace.updateDevice();
+    arrComma.updateDevice().wait();
+
+    std::cout << "Before Kernel:\n";
+    std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma << "\n";

    // Call the kernel multiple times asynchronously. Note: since they share same
    // stream, they are not run in parallel, just queued on the device.
-    KERNEL(times2, CudaTools::Kernel::basic(arrRange.shape().items()), arrRange);
-    KERNEL(times2, CudaTools::Kernel::basic(arrConst.shape().items()), arrRange);
-    KERNEL(times2, CudaTools::Kernel::basic(arrLinspace.shape().items()), arrRange).wait();
-    KERNEL(times2, CudaTools::Kernel::basic(arrComma.shape().items()), arrRange).wait();
+    // NOTE: Notice that a view is passed into the kernel, not the Array itself.
+    KERNEL(times2, CudaTools::Kernel::basic(arrRange.shape().items()), arrRange.view());
+    KERNEL(times2, CudaTools::Kernel::basic(arrConst.shape().items()), arrConst.view());
+    KERNEL(times2double, CudaTools::Kernel::basic(arrLinspace.shape().items()), arrLinspace.view());
+    KERNEL(times2, CudaTools::Kernel::basic(arrComma.shape().items()), arrComma.view()).wait();
    arrRange.updateHost();
    arrConst.updateHost();
    arrLinspace.updateHost();
-    arrComma.updateHost().wait(); // Only need to wait for the last one, since they have the same stream.
+    arrComma.updateHost().wait(); // Same stream, so you should wait for the last call.

-    std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma "\n";
+    std::cout << "After Kernel:\n";
+    std::cout << arrRange << "\n" << arrConst << "\n" << arrLinspace << "\n" << arrComma << "\n";
    return 0;
 }
-
-
--- a/samples/4_ArrayFunctions/Makefile
+++ b/samples/4_ArrayFunctions/Makefile
@ -14,30 +14,7 @@ SRC_DIR = .
 BUILD_DIR = build

 # Should not need to modify below.
-int main() {
-    CudaTools::Array<int> arr = CudaTools::Array<int>::constant(0);
-    arr.reshape({4, 5, 5}); // Creates a three dimensional array.

-    arr[0][0][0] = 1; // Axis by axis indexing.
-    arr[{1, 0, 0}] = 100; // Specific 'coordinate' indexing.
-    std::cout << arr << "\n";
-
-    CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(18);
-    auto arrSlice = arr.slice({{1, 2}, {1, 4}, {1, 4}}). // Takes a slice of the center.
-    std::cout << "Before Copy:\n" << arrSlice << "\n";
-    arrSlice = arrRange; // Copies arrRange into arrSlice. (Does NOT replace!)
-    std::cout << "After Copy:\n" << arrSlice << "\n";
-
-    std::cout << "Modified: \n" << arr << "\n"; // The original array is modified, since a slice does not copy.
-
-    CudaTools::Array<int> newArr = arr.copy(); // Copies the original Array.
-    for (auto it = newArr.begin(); it != newArr.end(); ++it) { // Iterate through the array.
-        *it = 1;
-    }
-    std::cout << "Modified New Array:\n" << newArr << "\n";
-    std::cout << "Old Array:\n" << arr << "\n"; // The original array was not modified after a copy.
-    return 0;
-}
 CPU_BUILD_DIR = $(BUILD_DIR)/cpu
 GPU_BUILD_DIR = $(BUILD_DIR)/gpu

--- a/samples/4_ArrayFunctions/main.cu.cpp
+++ b/samples/4_ArrayFunctions/main.cu.cpp
@ -1,24 +1,25 @@
 #define CUDATOOLS_IMPLEMENTATION
-#include <Core.h>
 #include <Array.h>
+#include <Core.h>

 int main() {
-    CudaTools::Array<int> arr = CudaTools::Array<int>::constant(0);
+    CudaTools::Array<int> arr = CudaTools::Array<int>::constant({100}, 0);
    arr.reshape({4, 5, 5}); // Creates a three dimensional array.

-    arr[0][0][0] = 1; // Axis by axis indexing.
+    arr[0][0][0] = 1;     // Axis by axis indexing.
    arr[{1, 0, 0}] = 100; // Specific 'coordinate' indexing.
    std::cout << arr << "\n";

-    CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(18);
-    auto arrSlice = arr.slice({{1, 2}, {1, 4}, {1, 4}}). // Takes a slice of the center.
+    CudaTools::Array<int> arrRange = CudaTools::Array<int>::range(0, 18);
+    auto arrSlice = arr.slice({{1, 3}, {1, 4}, {1, 4}}); // Takes a slice of the center.
    std::cout << "Before Copy:\n" << arrSlice << "\n";
    arrSlice = arrRange; // Copies arrRange into arrSlice. (Does NOT replace!)
    std::cout << "After Copy:\n" << arrSlice << "\n";

-    std::cout << "Modified: \n" << arr << "\n"; // The original array is modified, since a slice does not copy.
+    std::cout << "Modified: \n"
+              << arr << "\n"; // The original array is modified, since a slice does not copy.

-    CudaTools::Array<int> newArr = arr.copy(); // Copies the original Array.
+    CudaTools::Array<int> newArr = arr.copy();                 // Copies the original Array.
    for (auto it = newArr.begin(); it != newArr.end(); ++it) { // Iterate through the array.
        *it = 1;
    }
@ -26,5 +27,3 @@ int main() {
    std::cout << "Old Array:\n" << arr << "\n"; // The original array was not modified after a copy.
    return 0;
 }
-
-