1#ifndef TENSOR_IMPL_CUDA_H
2#define TENSOR_IMPL_CUDA_H
4#include "../tensor_impl.h"
5#include "nforge/core/tensor_shape.h"
22 void print(
const std::vector<size_t>& position)
const override;
32 std::unique_ptr<Tensor::Impl>
clone()
const override;
88 size_t k,
size_t p)
const override;
114 float tolerance)
const override;
123 template <
typename Kernel>
124 std::unique_ptr<Tensor::Impl> applyKernel(
const TensorLayout& lhsLayout,
129 template <
typename Kernel>
134 template <
typename Kernel>
135 std::unique_ptr<Tensor::Impl> applyReductionKernel(
const TensorLayout& layout,
138 float initValue, Kernel kernel)
const;
Definition tensor_impl_CUDA.h:13
float * dataPtr() const
Returns a raw pointer to the device data buffer.
std::unique_ptr< Tensor::Impl > lessEqual(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout) const override
Elementwise less or equal. Returns a tensor of 0.0 / 1.0 with outLayout.
void copyFromHost(const float *data, size_t count) override
std::unique_ptr< Tensor::Impl > min(const TensorLayout &layout, const TensorLayout &blockLayout, const TensorLayout &outLayout) const override
Reduces dimensions [dim, rank) by taking the minimum. Output with outLayout.
void iadd(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout) override
In-place elementwise addition. Modifies lhsLayout in place.
void isub(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout) override
In-place elementwise subtraction. Modifies lhsLayout in place.
void print() const override
Prints the entire tensor to stdout.
void fillAll(float value) override
Fills all elements with value.
std::unique_ptr< Tensor::Impl > mul(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout) const override
Elementwise multiplication. Returns a new Impl with the result with outLayout.
std::string toString() const override
Returns a string representation of the data.
std::unique_ptr< Tensor::Impl > norm(const TensorLayout &layout) const override
L2 norm of the tensor described by layout.
void set(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout) override
Copies data from rhsImpl with rhsLayout into this with lhsLayout.
std::vector< float > toVector() const override
Copies all elements into a flat vector (row-major order).
std::unique_ptr< Tensor::Impl > div(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout) const override
Elementwise division. Returns a new Impl with the result with outLayout.
std::unique_ptr< Tensor::Impl > max(const TensorLayout &layout, const TensorLayout &blockLayout, const TensorLayout &outLayout) const override
Reduces dimensions [dim, rank) by taking the maximum. Output with outLayout.
std::unique_ptr< Tensor::Impl > isClose(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout, float tolerance) const override
Elementwise closeness within tolerance. Returns a tensor of 0.0 / 1.0 with outLayout.
Tensor::Shape getShape() const override
Returns the tensor shape.
std::unique_ptr< Tensor::Impl > sum(const TensorLayout &layout, const TensorLayout &blockLayout, const TensorLayout &outLayout) const override
Reduces dimensions [dim, rank) by summation. Output with outLayout.
std::unique_ptr< Tensor::Impl > less(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout) const override
Elementwise less than. Returns a tensor of 0.0 / 1.0 with outLayout.
bool compare(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout) const override
Returns true if the data with lhsLayout matches rhsImpl with rhsLayout.
void fillRand() override
Fills all elements with random values in [-1, 1].
std::unique_ptr< Tensor::Impl > greaterEqual(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout) const override
Elementwise greater or equal. Returns a tensor of 0.0 / 1.0 with outLayout.
std::unique_ptr< Tensor::Impl > clone() const override
Deep copies this implementation.
void idiv(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout) override
In-place elementwise division. Modifies lhsLayout in place.
std::unique_ptr< Tensor::Impl > greater(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout) const override
Elementwise greater than. Returns a tensor of 0.0 / 1.0 with outLayout.
void imul(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout) override
In-place elementwise multiplication. Modifies lhsLayout in place.
void print(const std::vector< size_t > &position) const override
Prints the block starting at position to stdout.
size_t getNumElements() const override
Returns the total number of elements.
std::unique_ptr< Tensor::Impl > add(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout) const override
Elementwise addition. Returns a new Impl with the result with outLayout.
std::unique_ptr< Tensor::Impl > prod(const TensorLayout &layout, const TensorLayout &blockLayout, const TensorLayout &outLayout) const override
Reduces dimensions [dim, rank) by taking the product. Output with outLayout.
std::unique_ptr< Tensor::Impl > sub(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout) const override
Elementwise subtraction. Returns a new Impl with the result with outLayout.
std::unique_ptr< Tensor::Impl > matmul(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout, size_t batch, size_t m, size_t k, size_t p) const override
Definition tensor_impl.h:15
Definition tensor_shape.h:15
Definition tensor_layout.h:15