|
NForge
Tensor library
|
#include <tensor_impl_CUDA.h>
Public Member Functions | |
| CUDAImpl (const Tensor::Shape &shape) | |
| void | fillAll (float value) override |
Fills all elements with value. | |
| void | fillRand () override |
| Fills all elements with random values in [-1, 1]. | |
| void | print () const override |
| Prints the entire tensor to stdout. | |
| void | print (const std::vector< size_t > &position) const override |
Prints the block starting at position to stdout. | |
| size_t | getNumElements () const override |
| Returns the total number of elements. | |
| Tensor::Shape | getShape () const override |
| Returns the tensor shape. | |
| float * | dataPtr () const |
| Returns a raw pointer to the device data buffer. | |
| std::vector< float > | toVector () const override |
| Copies all elements into a flat vector (row-major order). | |
| std::string | toString () const override |
| Returns a string representation of the data. | |
| std::unique_ptr< Tensor::Impl > | clone () const override |
| Deep copies this implementation. | |
| void | copyFromHost (const float *data, size_t count) override |
| void | set (const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout) override |
Copies data from rhsImpl with rhsLayout into this with lhsLayout. | |
| bool | compare (const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout) const override |
Returns true if the data with lhsLayout matches rhsImpl with rhsLayout. | |
| std::unique_ptr< Tensor::Impl > | add (const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout) const override |
Elementwise addition. Returns a new Impl with the result with outLayout. | |
| std::unique_ptr< Tensor::Impl > | sub (const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout) const override |
Elementwise subtraction. Returns a new Impl with the result with outLayout. | |
| std::unique_ptr< Tensor::Impl > | mul (const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout) const override |
Elementwise multiplication. Returns a new Impl with the result with outLayout. | |
| std::unique_ptr< Tensor::Impl > | div (const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout) const override |
Elementwise division. Returns a new Impl with the result with outLayout. | |
| void | iadd (const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout) override |
In-place elementwise addition. Modifies lhsLayout in place. | |
| void | isub (const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout) override |
In-place elementwise subtraction. Modifies lhsLayout in place. | |
| void | imul (const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout) override |
In-place elementwise multiplication. Modifies lhsLayout in place. | |
| void | idiv (const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout) override |
In-place elementwise division. Modifies lhsLayout in place. | |
| std::unique_ptr< Tensor::Impl > | sum (const TensorLayout &layout, const TensorLayout &blockLayout, const TensorLayout &outLayout) const override |
Reduces dimensions [dim, rank) by summation. Output with outLayout. | |
| std::unique_ptr< Tensor::Impl > | min (const TensorLayout &layout, const TensorLayout &blockLayout, const TensorLayout &outLayout) const override |
Reduces dimensions [dim, rank) by taking the minimum. Output with outLayout. | |
| std::unique_ptr< Tensor::Impl > | max (const TensorLayout &layout, const TensorLayout &blockLayout, const TensorLayout &outLayout) const override |
Reduces dimensions [dim, rank) by taking the maximum. Output with outLayout. | |
| std::unique_ptr< Tensor::Impl > | prod (const TensorLayout &layout, const TensorLayout &blockLayout, const TensorLayout &outLayout) const override |
Reduces dimensions [dim, rank) by taking the product. Output with outLayout. | |
| std::unique_ptr< Tensor::Impl > | norm (const TensorLayout &layout) const override |
L2 norm of the tensor described by layout. | |
| std::unique_ptr< Tensor::Impl > | matmul (const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout, size_t batch, size_t m, size_t k, size_t p) const override |
| std::unique_ptr< Tensor::Impl > | less (const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout) const override |
Elementwise less than. Returns a tensor of 0.0 / 1.0 with outLayout. | |
| std::unique_ptr< Tensor::Impl > | lessEqual (const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout) const override |
Elementwise less or equal. Returns a tensor of 0.0 / 1.0 with outLayout. | |
| std::unique_ptr< Tensor::Impl > | greater (const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout) const override |
Elementwise greater than. Returns a tensor of 0.0 / 1.0 with outLayout. | |
| std::unique_ptr< Tensor::Impl > | greaterEqual (const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout) const override |
Elementwise greater or equal. Returns a tensor of 0.0 / 1.0 with outLayout. | |
| std::unique_ptr< Tensor::Impl > | isClose (const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout, float tolerance) const override |
Elementwise closeness within tolerance. Returns a tensor of 0.0 / 1.0 with outLayout. | |
CUDA implementation of Tensor::Impl, backed by device memory.
All operations launch CUDA kernels using TensorLayout descriptors. The caller is responsible for layout validity, see Tensor::Impl.
Overridden methods follow the same semantics documented in Tensor::Impl.
|
overridevirtual |
Elementwise addition. Returns a new Impl with the result with outLayout.
Implements Tensor::Impl.
|
overridevirtual |
Deep copies this implementation.
Implements Tensor::Impl.
|
overridevirtual |
Returns true if the data with lhsLayout matches rhsImpl with rhsLayout.
Implements Tensor::Impl.
|
overridevirtual |
Copies data from a host float array into this backend's storage.
| data | Source array (must have at least count elements). |
| count | Number of elements to copy. |
Implements Tensor::Impl.
|
overridevirtual |
Elementwise division. Returns a new Impl with the result with outLayout.
Implements Tensor::Impl.
|
overridevirtual |
Fills all elements with value.
Implements Tensor::Impl.
|
overridevirtual |
Fills all elements with random values in [-1, 1].
Implements Tensor::Impl.
|
overridevirtual |
Returns the total number of elements.
Implements Tensor::Impl.
|
overridevirtual |
Returns the tensor shape.
Implements Tensor::Impl.
|
overridevirtual |
Elementwise greater than. Returns a tensor of 0.0 / 1.0 with outLayout.
Implements Tensor::Impl.
|
overridevirtual |
Elementwise greater or equal. Returns a tensor of 0.0 / 1.0 with outLayout.
Implements Tensor::Impl.
|
overridevirtual |
In-place elementwise addition. Modifies lhsLayout in place.
Implements Tensor::Impl.
|
overridevirtual |
In-place elementwise division. Modifies lhsLayout in place.
Implements Tensor::Impl.
|
overridevirtual |
In-place elementwise multiplication. Modifies lhsLayout in place.
Implements Tensor::Impl.
|
overridevirtual |
Elementwise closeness within tolerance. Returns a tensor of 0.0 / 1.0 with outLayout.
Implements Tensor::Impl.
|
overridevirtual |
In-place elementwise subtraction. Modifies lhsLayout in place.
Implements Tensor::Impl.
|
overridevirtual |
Elementwise less than. Returns a tensor of 0.0 / 1.0 with outLayout.
Implements Tensor::Impl.
|
overridevirtual |
Elementwise less or equal. Returns a tensor of 0.0 / 1.0 with outLayout.
Implements Tensor::Impl.
|
overridevirtual |
Matrix multiplication. The last two dims of each layout are the matrix dims. batch, m, k, p describe the decomposition of the matmul problem.
2D: (m, k) @ (k, p) => (m, p).
3D: (batch, m, k) @ (batch, k, p) => (batch, m, p).
Implements Tensor::Impl.
|
overridevirtual |
Reduces dimensions [dim, rank) by taking the maximum. Output with outLayout.
Implements Tensor::Impl.
|
overridevirtual |
Reduces dimensions [dim, rank) by taking the minimum. Output with outLayout.
Implements Tensor::Impl.
|
overridevirtual |
Elementwise multiplication. Returns a new Impl with the result with outLayout.
Implements Tensor::Impl.
|
overridevirtual |
L2 norm of the tensor described by layout.
Implements Tensor::Impl.
|
overridevirtual |
Prints the entire tensor to stdout.
Implements Tensor::Impl.
|
overridevirtual |
Prints the block starting at position to stdout.
Implements Tensor::Impl.
|
overridevirtual |
Reduces dimensions [dim, rank) by taking the product. Output with outLayout.
Implements Tensor::Impl.
|
overridevirtual |
Copies data from rhsImpl with rhsLayout into this with lhsLayout.
Implements Tensor::Impl.
|
overridevirtual |
Elementwise subtraction. Returns a new Impl with the result with outLayout.
Implements Tensor::Impl.
|
overridevirtual |
Reduces dimensions [dim, rank) by summation. Output with outLayout.
Implements Tensor::Impl.
|
overridevirtual |
Returns a string representation of the data.
Implements Tensor::Impl.
|
overridevirtual |
Copies all elements into a flat vector (row-major order).
Implements Tensor::Impl.