43 class IndexerIterator;
46 static constexpr int64_t MAX_DIMS = 10;
50 static constexpr int64_t MAX_INPUTS = 10;
54 static constexpr int64_t MAX_OUTPUTS = 2;
57 template <
typename T,
int size>
69 template <
int NARGS,
typename index_t = u
int32_t>
73 const int64_t*
const* strides)
75 if (dims_ > MAX_DIMS) {
79 for (
int i = 0; i < MAX_DIMS; ++i) {
85 for (
int arg = 0; arg < NARGS; arg++) {
86 strides_[i][arg] = i < dims_ ? strides[arg][i] : 0;
92 index_t linear_idx)
const {
94 #if defined(__CUDA_ARCH__) 97 for (
int arg = 0; arg < NARGS; arg++) {
101 #if defined(__CUDA_ARCH__) 104 for (
int dim = 0; dim < MAX_DIMS; ++dim) {
108 index_t mod = linear_idx % sizes_[dim];
109 linear_idx = linear_idx / sizes_[dim];
111 #if defined(__CUDA_ARCH__) 114 for (
int arg = 0; arg < NARGS; arg++) {
115 offsets[arg] += mod * strides_[dim][arg];
122 index_t sizes_[MAX_DIMS];
123 index_t strides_[MAX_DIMS][NARGS];
131 TensorRef() : data_ptr_(nullptr), ndims_(0), dtype_byte_size_(0) {}
138 data_ptr_ =
const_cast<void*
>(t.
GetDataPtr());
141 for (int64_t i = 0; i < ndims_; ++i) {
143 byte_strides_[i] = t.
GetStride(i) * dtype_byte_size_;
149 if (static_cast<int64_t>(dims.size()) != ndims_) {
151 dims.size(), ndims_);
153 std::vector<bool> seen_dims(ndims_,
false);
154 for (
const int64_t& dim : dims) {
155 seen_dims[dim] =
true;
157 if (!std::all_of(seen_dims.begin(), seen_dims.end(),
158 [](
bool seen) {
return seen; })) {
160 "Permute dims must be a permuntation from 0 to {}.",
167 for (int64_t i = 0; i < ndims_; ++i) {
169 new_shape[i] = shape_[old_dim];
170 new_byte_strides[i] = byte_strides_[old_dim];
172 for (int64_t i = 0; i < ndims_; ++i) {
173 shape_[i] = new_shape[i];
174 byte_strides_[i] = new_byte_strides[i];
180 rc = rc && (data_ptr_ == other.
data_ptr_);
181 rc = rc && (ndims_ == other.
ndims_);
183 for (int64_t i = 0; i < ndims_; ++i) {
184 rc = rc && (shape_[i] == other.
shape_[i]);
194 int64_t dtype_byte_size_ = 0;
195 int64_t shape_[MAX_DIMS];
196 int64_t byte_strides_[MAX_DIMS];
224 : input_(
TensorRef(tensor)), ndims_(tensor.NumDims()) {}
227 int64_t num_workloads = 1;
228 for (int64_t i = 0; i < ndims_; ++i) {
229 num_workloads *= input_.shape_[i];
231 return num_workloads;
235 if (workload_idx < 0 || workload_idx >= NumWorkloads()) {
239 workload_idx = workload_idx * input_.dtype_byte_size_;
240 for (int64_t i = 0; i < ndims_; ++i) {
241 offset += workload_idx / input_.byte_strides_[i] *
242 input_.byte_strides_[i];
243 workload_idx = workload_idx % input_.byte_strides_[i];
245 return static_cast<void*
>(
static_cast<char*
>(input_.data_ptr_) +
270 Indexer(
const std::vector<Tensor>& input_tensors,
271 const Tensor& output_tensor,
275 Indexer(
const std::vector<Tensor>& input_tensors,
276 const std::vector<Tensor>& output_tensors,
281 bool CanUse32BitIndexing()
const;
290 std::unique_ptr<Indexer> SplitLargestDim();
294 Indexer GetPerOutputIndexer(int64_t output_idx)
const;
305 void ShrinkDim(int64_t dim, int64_t start, int64_t
size);
308 int64_t NumReductionDims()
const;
332 int64_t NumWorkloads()
const;
335 int64_t NumOutputElements()
const;
342 if (i >= num_inputs_ || i < 0) {
349 if (i >= num_inputs_ || i < 0) {
358 if (i >= num_outputs_ || i < 0) {
365 if (i >= num_outputs_ || i < 0) {
375 if (num_outputs_ > 1) {
382 if (num_outputs_ > 1) {
394 return outputs_[0].byte_strides_[dim] == 0 && master_shape_[dim] > 1;
403 int64_t workload_idx)
const {
404 if (input_idx < 0 || input_idx >= num_inputs_) {
407 return GetWorkloadDataPtr(inputs_[input_idx], workload_idx);
415 return GetWorkloadDataPtr(outputs_[0], workload_idx);
418 int64_t workload_idx)
const {
419 return GetWorkloadDataPtr(outputs_[output_idx], workload_idx);
425 void CoalesceDimensions();
430 void ReorderDimensions(
const SizeVector& reduction_dims);
433 void UpdateMasterStrides();
461 static void BroadcastRestride(
TensorRef& src,
463 const int64_t* dst_shape);
467 static void ReductionRestride(
TensorRef& dst,
469 const int64_t* src_shape,
476 int64_t workload_idx)
const {
479 if (workload_idx < 0) {
483 for (int64_t i = 0; i < ndims_; ++i) {
484 offset += workload_idx / master_strides_[i] * tr.
byte_strides_[i];
485 workload_idx = workload_idx % master_strides_[i];
487 return static_cast<char*
>(tr.
data_ptr_) + offset;
491 int64_t num_inputs_ = 0;
492 int64_t num_outputs_ = 0;
511 int64_t master_shape_[MAX_DIMS];
515 int64_t master_strides_[MAX_DIMS];
523 bool final_output_ =
true;
527 bool accumulate_ =
false;
542 std::vector<std::unique_ptr<Indexer>>
vec_;
OPEN3D_HOST_DEVICE char * GetOutputPtr(int64_t workload_idx) const
Definition: Indexer.h:414
TensorRef()
Definition: Indexer.h:131
int64_t dtype_byte_size_
Definition: Indexer.h:194
OPEN3D_HOST_DEVICE void * GetPtr(int64_t workload_idx) const
Definition: Indexer.h:234
FN_SPECIFIERS bool operator!=(const MiniVec< T, N > &a, const MiniVec< T, N > &b)
Definition: MiniVec.h:101
int64_t NumDims() const
Definition: Tensor.h:951
OPEN3D_HOST_DEVICE int64_t NumWorkloads() const
Definition: Indexer.h:226
void * data_ptr_
Definition: Indexer.h:192
TensorIterator(const Tensor &tensor)
Definition: Indexer.h:223
bool IsReductionDim(int64_t dim) const
Returns true if the dim -th dimension is reduced.
Definition: Indexer.h:390
int dims_
Definition: Indexer.h:121
A minimalistic class that reference a Tensor.
Definition: Indexer.h:127
int64_t ndims_
Definition: Indexer.h:193
void * GetDataPtr()
Definition: Tensor.h:939
Definition: Indexer.h:221
int offset
Definition: FilePCD.cpp:64
TensorRef(const Tensor &t)
Definition: Indexer.h:133
OPEN3D_HOST_DEVICE char * GetWorkloadDataPtr(const TensorRef &tr, int64_t workload_idx) const
Definition: Indexer.h:475
int64_t shape_[MAX_DIMS]
Definition: Indexer.h:195
int64_t * GetMasterShape()
Definition: Indexer.h:316
void LogError(const char *format, const Args &... args)
Definition: Console.h:176
bool operator==(const PointXYZ A, const PointXYZ B)
Definition: Cloud.h:151
T data_[size]
Definition: Indexer.h:59
TensorRef input_
Definition: Indexer.h:250
OPEN3D_HOST_DEVICE char * GetOutputPtr(int64_t output_idx, int64_t workload_idx) const
Definition: Indexer.h:417
Definition: SizeVector.h:41
TensorRef & GetInput(int64_t i)
Returns input TensorRef.
Definition: Indexer.h:341
const int64_t * GetMasterStrides() const
Definition: Indexer.h:320
void Permute(const SizeVector &dims)
Definition: Indexer.h:147
Dtype GetDtype() const
Definition: Tensor.h:943
int size
Definition: FilePCD.cpp:59
int64_t GetStride(int64_t dim) const
Definition: Tensor.h:935
DtypePolicy
Definition: Indexer.h:199
Definition: Indexer.h:532
#define OPEN3D_HOST_DEVICE
Definition: CUDAUtils.h:54
bool IsFinalOutput() const
Definition: Indexer.h:298
OPEN3D_HOST_DEVICE char * GetInputPtr(int64_t input_idx, int64_t workload_idx) const
Definition: Indexer.h:402
OffsetCalculator(int dims, const int64_t *sizes, const int64_t *const *strides)
Definition: Indexer.h:71
TensorRef & GetOutput()
Definition: Indexer.h:374
std::vector< std::unique_ptr< Indexer > > vec_
Definition: Indexer.h:542
OPEN3D_HOST_DEVICE T operator[](int i) const
Definition: Indexer.h:61
int64_t WrapDim(int64_t dim, int64_t max_dim, bool inclusive)
Wrap around negative dim.
Definition: ShapeUtil.cpp:150
Tensor operator*(T scalar_lhs, const Tensor &rhs)
Definition: Tensor.h:1115
const TensorRef & GetOutput() const
Definition: Indexer.h:381
SizeVector GetShape() const
Definition: Tensor.h:923
Iterator()
Definition: Indexer.h:533
const TensorRef & GetOutput(int64_t i) const
Definition: Indexer.h:364
int64_t ndims_
Definition: Indexer.h:251
int64_t NumDims() const
Returns number of dimensions of the Indexer.
Definition: Indexer.h:311
Definition: PinholeCameraIntrinsic.cpp:35
const TensorRef & GetInput(int64_t i) const
Definition: Indexer.h:348
int64_t ByteSize() const
Definition: Dtype.h:71
bool ShouldAccumulate() const
Definition: Indexer.h:296
const int64_t * GetMasterShape() const
Definition: Indexer.h:315
Definition: Indexer.h:530
SmallArray & operator=(const SmallArray &)=default
OPEN3D_HOST_DEVICE T & operator[](int i)
Definition: Indexer.h:62
bool operator!=(const TensorRef &other) const
Definition: Indexer.h:190
Definition: Indexer.h:261
int64_t byte_strides_[MAX_DIMS]
Definition: Indexer.h:196
Indexer()
Definition: Indexer.h:263
bool operator==(const TensorRef &other) const
Definition: Indexer.h:178
int64_t NumInputs() const
Number of input Tensors.
Definition: Indexer.h:338
TensorRef & GetOutput(int64_t i)
Returns output TensorRef.
Definition: Indexer.h:357