NForge
Tensor library
Loading...
Searching...
No Matches
tensor_impl_CUDA.h
1#ifndef TENSOR_IMPL_CUDA_H
2#define TENSOR_IMPL_CUDA_H
3
4#include "../tensor_impl.h"
5#include "nforge/core/tensor_shape.h"
6
14public:
15 CUDAImpl(const Tensor::Shape& shape);
16 ~CUDAImpl();
17
18 void fillAll(float value) override;
19 void fillRand() override;
20
21 void print() const override;
22 void print(const std::vector<size_t>& position) const override;
23
24 size_t getNumElements() const override;
25 Tensor::Shape getShape() const override;
26
28 float* dataPtr() const;
29 std::vector<float> toVector() const override;
30 std::string toString() const override;
31
32 std::unique_ptr<Tensor::Impl> clone() const override;
33
34 void copyFromHost(const float* data, size_t count) override;
35
36 void set(const TensorLayout& lhsLayout, const Tensor::Impl* rhsImpl,
37 const TensorLayout& rhsLayout) override;
38
39 bool compare(const TensorLayout& lhsLayout, const Tensor::Impl* rhsImpl,
40 const TensorLayout& rhsLayout) const override;
41
42 std::unique_ptr<Tensor::Impl> add(const TensorLayout& lhsLayout, const Tensor::Impl* rhsImpl,
43 const TensorLayout& rhsLayout,
44 const TensorLayout& outLayout) const override;
45
46 std::unique_ptr<Tensor::Impl> sub(const TensorLayout& lhsLayout, const Tensor::Impl* rhsImpl,
47 const TensorLayout& rhsLayout,
48 const TensorLayout& outLayout) const override;
49
50 std::unique_ptr<Tensor::Impl> mul(const TensorLayout& lhsLayout, const Tensor::Impl* rhsImpl,
51 const TensorLayout& rhsLayout,
52 const TensorLayout& outLayout) const override;
53
54 std::unique_ptr<Tensor::Impl> div(const TensorLayout& lhsLayout, const Tensor::Impl* rhsImpl,
55 const TensorLayout& rhsLayout,
56 const TensorLayout& outLayout) const override;
57
58 void iadd(const TensorLayout& lhsLayout, const Tensor::Impl* rhsImpl,
59 const TensorLayout& rhsLayout) override;
60
61 void isub(const TensorLayout& lhsLayout, const Tensor::Impl* rhsImpl,
62 const TensorLayout& rhsLayout) override;
63
64 void imul(const TensorLayout& lhsLayout, const Tensor::Impl* rhsImpl,
65 const TensorLayout& rhsLayout) override;
66
67 void idiv(const TensorLayout& lhsLayout, const Tensor::Impl* rhsImpl,
68 const TensorLayout& rhsLayout) override;
69
70 std::unique_ptr<Tensor::Impl> sum(const TensorLayout& layout, const TensorLayout& blockLayout,
71 const TensorLayout& outLayout) const override;
72
73 std::unique_ptr<Tensor::Impl> min(const TensorLayout& layout, const TensorLayout& blockLayout,
74 const TensorLayout& outLayout) const override;
75
76 std::unique_ptr<Tensor::Impl> max(const TensorLayout& layout, const TensorLayout& blockLayout,
77 const TensorLayout& outLayout) const override;
78
79 std::unique_ptr<Tensor::Impl> prod(const TensorLayout& layout, const TensorLayout& blockLayout,
80 const TensorLayout& outLayout) const override;
81
82 std::unique_ptr<Tensor::Impl> norm(const TensorLayout& layout) const override;
83
84
85 std::unique_ptr<Tensor::Impl> matmul(const TensorLayout& lhsLayout, const Tensor::Impl* rhsImpl,
86 const TensorLayout& rhsLayout,
87 const TensorLayout& outLayout, size_t batch, size_t m,
88 size_t k, size_t p) const override;
89
90
91 std::unique_ptr<Tensor::Impl> less(const TensorLayout& lhsLayout, const Tensor::Impl* rhsImpl,
92 const TensorLayout& rhsLayout,
93 const TensorLayout& outLayout) const override;
94
95 std::unique_ptr<Tensor::Impl> lessEqual(const TensorLayout& lhsLayout,
96 const Tensor::Impl* rhsImpl,
97 const TensorLayout& rhsLayout,
98 const TensorLayout& outLayout) const override;
99
100 std::unique_ptr<Tensor::Impl> greater(const TensorLayout& lhsLayout,
101 const Tensor::Impl* rhsImpl,
102 const TensorLayout& rhsLayout,
103 const TensorLayout& outLayout) const override;
104
105 std::unique_ptr<Tensor::Impl> greaterEqual(const TensorLayout& lhsLayout,
106 const Tensor::Impl* rhsImpl,
107 const TensorLayout& rhsLayout,
108 const TensorLayout& outLayout) const override;
109
110 std::unique_ptr<Tensor::Impl> isClose(const TensorLayout& lhsLayout,
111 const Tensor::Impl* rhsImpl,
112 const TensorLayout& rhsLayout,
113 const TensorLayout& outLayout,
114 float tolerance) const override;
115
116private:
117 Tensor::Shape m_shape;
118 float* d_data;
119
121 const Tensor::CUDAImpl* cast(const Tensor::Impl* p) const;
122
123 template <typename Kernel>
124 std::unique_ptr<Tensor::Impl> applyKernel(const TensorLayout& lhsLayout,
125 const Tensor::Impl* rhsImpl,
126 const TensorLayout& rhsLayout,
127 const TensorLayout& outLayout, Kernel kernel) const;
128
129 template <typename Kernel>
130 void applyInplaceKernel(const TensorLayout& lhsLayout, const Tensor::Impl* rhsImpl,
131 const TensorLayout& rhsLayout, Kernel kernel);
132
133 // kernel must be associative
134 template <typename Kernel>
135 std::unique_ptr<Tensor::Impl> applyReductionKernel(const TensorLayout& layout,
136 const TensorLayout& blockLayout,
137 const TensorLayout& outLayout,
138 float initValue, Kernel kernel) const;
139};
140
141#endif // TENSOR_IMPL_CUDA_H
Definition tensor_impl_CUDA.h:13
float * dataPtr() const
Returns a raw pointer to the device data buffer.
std::unique_ptr< Tensor::Impl > lessEqual(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout) const override
Elementwise less or equal. Returns a tensor of 0.0 / 1.0 with outLayout.
void copyFromHost(const float *data, size_t count) override
std::unique_ptr< Tensor::Impl > min(const TensorLayout &layout, const TensorLayout &blockLayout, const TensorLayout &outLayout) const override
Reduces dimensions [dim, rank) by taking the minimum. Output with outLayout.
void iadd(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout) override
In-place elementwise addition. Modifies lhsLayout in place.
void isub(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout) override
In-place elementwise subtraction. Modifies lhsLayout in place.
void print() const override
Prints the entire tensor to stdout.
void fillAll(float value) override
Fills all elements with value.
std::unique_ptr< Tensor::Impl > mul(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout) const override
Elementwise multiplication. Returns a new Impl with the result with outLayout.
std::string toString() const override
Returns a string representation of the data.
std::unique_ptr< Tensor::Impl > norm(const TensorLayout &layout) const override
L2 norm of the tensor described by layout.
void set(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout) override
Copies data from rhsImpl with rhsLayout into this with lhsLayout.
std::vector< float > toVector() const override
Copies all elements into a flat vector (row-major order).
std::unique_ptr< Tensor::Impl > div(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout) const override
Elementwise division. Returns a new Impl with the result with outLayout.
std::unique_ptr< Tensor::Impl > max(const TensorLayout &layout, const TensorLayout &blockLayout, const TensorLayout &outLayout) const override
Reduces dimensions [dim, rank) by taking the maximum. Output with outLayout.
std::unique_ptr< Tensor::Impl > isClose(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout, float tolerance) const override
Elementwise closeness within tolerance. Returns a tensor of 0.0 / 1.0 with outLayout.
Tensor::Shape getShape() const override
Returns the tensor shape.
std::unique_ptr< Tensor::Impl > sum(const TensorLayout &layout, const TensorLayout &blockLayout, const TensorLayout &outLayout) const override
Reduces dimensions [dim, rank) by summation. Output with outLayout.
std::unique_ptr< Tensor::Impl > less(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout) const override
Elementwise less than. Returns a tensor of 0.0 / 1.0 with outLayout.
bool compare(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout) const override
Returns true if the data with lhsLayout matches rhsImpl with rhsLayout.
void fillRand() override
Fills all elements with random values in [-1, 1].
std::unique_ptr< Tensor::Impl > greaterEqual(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout) const override
Elementwise greater or equal. Returns a tensor of 0.0 / 1.0 with outLayout.
std::unique_ptr< Tensor::Impl > clone() const override
Deep copies this implementation.
void idiv(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout) override
In-place elementwise division. Modifies lhsLayout in place.
std::unique_ptr< Tensor::Impl > greater(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout) const override
Elementwise greater than. Returns a tensor of 0.0 / 1.0 with outLayout.
void imul(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout) override
In-place elementwise multiplication. Modifies lhsLayout in place.
void print(const std::vector< size_t > &position) const override
Prints the block starting at position to stdout.
size_t getNumElements() const override
Returns the total number of elements.
std::unique_ptr< Tensor::Impl > add(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout) const override
Elementwise addition. Returns a new Impl with the result with outLayout.
std::unique_ptr< Tensor::Impl > prod(const TensorLayout &layout, const TensorLayout &blockLayout, const TensorLayout &outLayout) const override
Reduces dimensions [dim, rank) by taking the product. Output with outLayout.
std::unique_ptr< Tensor::Impl > sub(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout) const override
Elementwise subtraction. Returns a new Impl with the result with outLayout.
std::unique_ptr< Tensor::Impl > matmul(const TensorLayout &lhsLayout, const Tensor::Impl *rhsImpl, const TensorLayout &rhsLayout, const TensorLayout &outLayout, size_t batch, size_t m, size_t k, size_t p) const override
Definition tensor_impl.h:15
Definition tensor_shape.h:15
Definition tensor_layout.h:15