Open3D (C++ API)  0.11.0
Indexer.h
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - Open3D: www.open3d.org -
3 // ----------------------------------------------------------------------------
4 // The MIT License (MIT)
5 //
6 // Copyright (c) 2018 www.open3d.org
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
14 //
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
17 //
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 // IN THE SOFTWARE.
25 // ----------------------------------------------------------------------------
26 
27 #pragma once
28 
29 #include <sstream>
30 
31 #include "open3d/core/CUDAUtils.h"
32 #include "open3d/core/Dtype.h"
33 #include "open3d/core/ShapeUtil.h"
34 #include "open3d/core/SizeVector.h"
35 #include "open3d/core/Tensor.h"
36 #include "open3d/utility/Console.h"
37 
38 namespace open3d {
39 namespace core {
40 
41 class Indexer;
42 
43 class IndexerIterator;
44 
45 // Maximum number of dimensions of TensorRef.
46 static constexpr int64_t MAX_DIMS = 10;
47 
48 // Maximum number of inputs of an op.
49 // MAX_INPUTS shall be >= MAX_DIMS to support advanced indexing.
50 static constexpr int64_t MAX_INPUTS = 10;
51 
52 // Maximum number of outputs of an op. This number can be increased when
53 // necessary.
54 static constexpr int64_t MAX_OUTPUTS = 2;
55 
56 // Fixed-size array type usable from host and device.
57 template <typename T, int size>
58 struct alignas(16) SmallArray {
59  T data_[size];
60 
61  OPEN3D_HOST_DEVICE T operator[](int i) const { return data_[i]; }
62  OPEN3D_HOST_DEVICE T& operator[](int i) { return data_[i]; }
63 
64  SmallArray() = default;
65  SmallArray(const SmallArray&) = default;
66  SmallArray& operator=(const SmallArray&) = default;
67 };
68 
69 template <int NARGS, typename index_t = uint32_t>
71  OffsetCalculator(int dims,
72  const int64_t* sizes,
73  const int64_t* const* strides)
74  : dims_(dims) {
75  if (dims_ > MAX_DIMS) {
76  utility::LogError("tensor has too many (>{}) dims_", MAX_DIMS);
77  }
78 
79  for (int i = 0; i < MAX_DIMS; ++i) {
80  if (i < dims_) {
81  sizes_[i] = sizes[i];
82  } else {
83  sizes_[i] = 1;
84  }
85  for (int arg = 0; arg < NARGS; arg++) {
86  strides_[i][arg] = i < dims_ ? strides[arg][i] : 0;
87  }
88  }
89  }
90 
92  index_t linear_idx) const {
94 #if defined(__CUDA_ARCH__)
95 #pragma unroll
96 #endif
97  for (int arg = 0; arg < NARGS; arg++) {
98  offsets[arg] = 0;
99  }
100 
101 #if defined(__CUDA_ARCH__)
102 #pragma unroll
103 #endif
104  for (int dim = 0; dim < MAX_DIMS; ++dim) {
105  if (dim == dims_) {
106  break;
107  }
108  index_t mod = linear_idx % sizes_[dim];
109  linear_idx = linear_idx / sizes_[dim];
110 
111 #if defined(__CUDA_ARCH__)
112 #pragma unroll
113 #endif
114  for (int arg = 0; arg < NARGS; arg++) {
115  offsets[arg] += mod * strides_[dim][arg];
116  }
117  }
118  return offsets;
119  }
120 
121  int dims_;
122  index_t sizes_[MAX_DIMS];
123  index_t strides_[MAX_DIMS][NARGS];
124 };
125 
127 struct TensorRef {
128  // The default copy constructor works on __device__ as well so we don't
129  // define it explicitly. shape_[MAX_DIMS] and strides[MAX_DIMS] will be
130  // copied fully.
131  TensorRef() : data_ptr_(nullptr), ndims_(0), dtype_byte_size_(0) {}
132 
133  TensorRef(const Tensor& t) {
134  if (t.NumDims() > MAX_DIMS) {
135  utility::LogError("Tenor has too many dimensions {} > {}.",
136  t.NumDims(), MAX_DIMS);
137  }
138  data_ptr_ = const_cast<void*>(t.GetDataPtr());
139  ndims_ = t.NumDims();
140  dtype_byte_size_ = t.GetDtype().ByteSize();
141  for (int64_t i = 0; i < ndims_; ++i) {
142  shape_[i] = t.GetShape(i);
143  byte_strides_[i] = t.GetStride(i) * dtype_byte_size_;
144  }
145  }
146 
147  void Permute(const SizeVector& dims) {
148  // Check dims are permuntation of [0, 1, 2, ..., n-1]
149  if (static_cast<int64_t>(dims.size()) != ndims_) {
150  utility::LogError("Number of dimensions mismatch {} != {}.",
151  dims.size(), ndims_);
152  }
153  std::vector<bool> seen_dims(ndims_, false);
154  for (const int64_t& dim : dims) {
155  seen_dims[dim] = true;
156  }
157  if (!std::all_of(seen_dims.begin(), seen_dims.end(),
158  [](bool seen) { return seen; })) {
160  "Permute dims must be a permuntation from 0 to {}.",
161  dims.size() - 1);
162  }
163 
164  // Map to new shape and strides
165  SizeVector new_shape(ndims_);
166  SizeVector new_byte_strides(ndims_);
167  for (int64_t i = 0; i < ndims_; ++i) {
168  int64_t old_dim = shape_util::WrapDim(dims[i], ndims_);
169  new_shape[i] = shape_[old_dim];
170  new_byte_strides[i] = byte_strides_[old_dim];
171  }
172  for (int64_t i = 0; i < ndims_; ++i) {
173  shape_[i] = new_shape[i];
174  byte_strides_[i] = new_byte_strides[i];
175  }
176  }
177 
178  bool operator==(const TensorRef& other) const {
179  bool rc = true;
180  rc = rc && (data_ptr_ == other.data_ptr_);
181  rc = rc && (ndims_ == other.ndims_);
182  rc = rc && (dtype_byte_size_ == other.dtype_byte_size_);
183  for (int64_t i = 0; i < ndims_; ++i) {
184  rc = rc && (shape_[i] == other.shape_[i]);
185  rc = rc && (byte_strides_[i] == other.byte_strides_[i]);
186  }
187  return rc;
188  }
189 
190  bool operator!=(const TensorRef& other) const { return !(*this == other); }
191 
192  void* data_ptr_;
193  int64_t ndims_ = 0;
194  int64_t dtype_byte_size_ = 0;
195  int64_t shape_[MAX_DIMS];
196  int64_t byte_strides_[MAX_DIMS];
197 };
198 
199 enum class DtypePolicy {
200  NONE, // Do not check. Expects the kernel to handle the conversion.
201  // E.g. in Copy kernel with type casting.
202  ALL_SAME, // All inputs and outputs to to have the same dtype.
203  INPUT_SAME, // All inputs have the same dtype.
204  INPUT_SAME_OUTPUT_BOOL // All inputs have the same dtype. Outputs
205  // have bool dtype.
206 };
207 
222 public:
223  TensorIterator(const Tensor& tensor)
224  : input_(TensorRef(tensor)), ndims_(tensor.NumDims()) {}
225 
227  int64_t num_workloads = 1;
228  for (int64_t i = 0; i < ndims_; ++i) {
229  num_workloads *= input_.shape_[i];
230  }
231  return num_workloads;
232  }
233 
234  OPEN3D_HOST_DEVICE void* GetPtr(int64_t workload_idx) const {
235  if (workload_idx < 0 || workload_idx >= NumWorkloads()) {
236  return nullptr;
237  }
238  int64_t offset = 0;
239  workload_idx = workload_idx * input_.dtype_byte_size_;
240  for (int64_t i = 0; i < ndims_; ++i) {
241  offset += workload_idx / input_.byte_strides_[i] *
242  input_.byte_strides_[i];
243  workload_idx = workload_idx % input_.byte_strides_[i];
244  }
245  return static_cast<void*>(static_cast<char*>(input_.data_ptr_) +
246  offset);
247  }
248 
249 protected:
251  int64_t ndims_;
252 };
253 
261 class Indexer {
262 public:
263  Indexer() {}
264  Indexer(const Indexer&) = default;
265  Indexer& operator=(const Indexer&) = default;
266 
270  Indexer(const std::vector<Tensor>& input_tensors,
271  const Tensor& output_tensor,
272  DtypePolicy dtype_policy = DtypePolicy::ALL_SAME,
273  const SizeVector& reduction_dims = {});
274 
275  Indexer(const std::vector<Tensor>& input_tensors,
276  const std::vector<Tensor>& output_tensors,
277  DtypePolicy dtype_policy = DtypePolicy::ALL_SAME,
278  const SizeVector& reduction_dims = {});
279 
281  bool CanUse32BitIndexing() const;
282 
285  IndexerIterator SplitTo32BitIndexing() const;
286 
290  std::unique_ptr<Indexer> SplitLargestDim();
291 
294  Indexer GetPerOutputIndexer(int64_t output_idx) const;
295 
296  bool ShouldAccumulate() const { return accumulate_; }
297 
298  bool IsFinalOutput() const { return final_output_; }
299 
305  void ShrinkDim(int64_t dim, int64_t start, int64_t size);
306 
308  int64_t NumReductionDims() const;
309 
311  int64_t NumDims() const { return ndims_; }
312 
315  const int64_t* GetMasterShape() const { return master_shape_; }
316  int64_t* GetMasterShape() { return master_shape_; }
317 
320  const int64_t* GetMasterStrides() const { return master_strides_; }
321 
332  int64_t NumWorkloads() const;
333 
335  int64_t NumOutputElements() const;
336 
338  int64_t NumInputs() const { return num_inputs_; }
339 
341  TensorRef& GetInput(int64_t i) {
342  if (i >= num_inputs_ || i < 0) {
343  utility::LogError("0 <= i < {} required, however, i = {}.",
344  num_inputs_, i);
345  }
346  return inputs_[i];
347  }
348  const TensorRef& GetInput(int64_t i) const {
349  if (i >= num_inputs_ || i < 0) {
350  utility::LogError("0 <= i < {} required, however, i = {}.",
351  num_inputs_, i);
352  }
353  return inputs_[i];
354  }
355 
357  TensorRef& GetOutput(int64_t i) {
358  if (i >= num_outputs_ || i < 0) {
359  utility::LogError("0 <= i < {} required, however, i = {}.",
360  num_outputs_, i);
361  }
362  return outputs_[i];
363  }
364  const TensorRef& GetOutput(int64_t i) const {
365  if (i >= num_outputs_ || i < 0) {
366  utility::LogError("0 <= i < {} required, however, i = {}.",
367  num_outputs_, i);
368  }
369  return outputs_[i];
370  }
371 
375  if (num_outputs_ > 1) {
376  utility::LogError("num_outputs_ == {} > 0, use GetOutput(i)",
377  num_outputs_);
378  }
379  return GetOutput(0);
380  }
381  const TensorRef& GetOutput() const {
382  if (num_outputs_ > 1) {
383  utility::LogError("num_outputs_ == {} > 0, use GetOutput(i)",
384  num_outputs_);
385  }
386  return GetOutput(0);
387  }
388 
390  bool IsReductionDim(int64_t dim) const {
391  // All outputs have the same shape and reduction dims. Even if they
392  // don't have the same initial strides, the reduced strides are always
393  // set to 0. Thus it is okay to use outputs_[0].
394  return outputs_[0].byte_strides_[dim] == 0 && master_shape_[dim] > 1;
395  }
396 
402  OPEN3D_HOST_DEVICE char* GetInputPtr(int64_t input_idx,
403  int64_t workload_idx) const {
404  if (input_idx < 0 || input_idx >= num_inputs_) {
405  return nullptr;
406  }
407  return GetWorkloadDataPtr(inputs_[input_idx], workload_idx);
408  }
409 
414  OPEN3D_HOST_DEVICE char* GetOutputPtr(int64_t workload_idx) const {
415  return GetWorkloadDataPtr(outputs_[0], workload_idx);
416  }
417  OPEN3D_HOST_DEVICE char* GetOutputPtr(int64_t output_idx,
418  int64_t workload_idx) const {
419  return GetWorkloadDataPtr(outputs_[output_idx], workload_idx);
420  }
421 
422 protected:
425  void CoalesceDimensions();
426 
427  // Permute reduction dimensions to front.
428  // TODO: Sort the dimensions based on strides in ascending orderto improve
429  // thread coalescing.
430  void ReorderDimensions(const SizeVector& reduction_dims);
431 
433  void UpdateMasterStrides();
434 
461  static void BroadcastRestride(TensorRef& src,
462  int64_t dst_ndims,
463  const int64_t* dst_shape);
464 
467  static void ReductionRestride(TensorRef& dst,
468  int64_t src_ndims,
469  const int64_t* src_shape,
470  const SizeVector& reduction_dims);
471 
476  int64_t workload_idx) const {
477  // For 0-sized input reduction op, the output Tensor
478  // workload_idx == 1 > NumWorkloads() == 0.
479  if (workload_idx < 0) {
480  return nullptr;
481  }
482  int64_t offset = 0;
483  for (int64_t i = 0; i < ndims_; ++i) {
484  offset += workload_idx / master_strides_[i] * tr.byte_strides_[i];
485  workload_idx = workload_idx % master_strides_[i];
486  }
487  return static_cast<char*>(tr.data_ptr_) + offset;
488  }
489 
491  int64_t num_inputs_ = 0;
492  int64_t num_outputs_ = 0;
493 
495  TensorRef inputs_[MAX_INPUTS];
496 
498  TensorRef outputs_[MAX_OUTPUTS];
499 
511  int64_t master_shape_[MAX_DIMS];
512 
515  int64_t master_strides_[MAX_DIMS];
516 
518  int64_t ndims_ = 0;
519 
523  bool final_output_ = true;
524 
527  bool accumulate_ = false;
528 };
529 
531 public:
532  struct Iterator {
533  Iterator(){};
534  Iterator(const Indexer& indexer);
535  Iterator(Iterator&& other) = default;
536 
537  Indexer& operator*() const;
538  Iterator& operator++();
539  bool operator==(const Iterator& other) const;
540  bool operator!=(const Iterator& other) const;
541 
542  std::vector<std::unique_ptr<Indexer>> vec_;
543  };
544 
545  IndexerIterator(const Indexer& indexer);
546 
547  Iterator begin() const;
548  Iterator end() const;
549 
550 private:
551  const Indexer& indexer_;
552 };
553 
554 } // namespace core
555 } // namespace open3d
OPEN3D_HOST_DEVICE char * GetOutputPtr(int64_t workload_idx) const
Definition: Indexer.h:414
TensorRef()
Definition: Indexer.h:131
int64_t dtype_byte_size_
Definition: Indexer.h:194
OPEN3D_HOST_DEVICE void * GetPtr(int64_t workload_idx) const
Definition: Indexer.h:234
FN_SPECIFIERS bool operator!=(const MiniVec< T, N > &a, const MiniVec< T, N > &b)
Definition: MiniVec.h:101
int64_t NumDims() const
Definition: Tensor.h:951
OPEN3D_HOST_DEVICE int64_t NumWorkloads() const
Definition: Indexer.h:226
void * data_ptr_
Definition: Indexer.h:192
TensorIterator(const Tensor &tensor)
Definition: Indexer.h:223
bool IsReductionDim(int64_t dim) const
Returns true if the dim -th dimension is reduced.
Definition: Indexer.h:390
int dims_
Definition: Indexer.h:121
A minimalistic class that reference a Tensor.
Definition: Indexer.h:127
Definition: Indexer.h:70
int64_t ndims_
Definition: Indexer.h:193
void * GetDataPtr()
Definition: Tensor.h:939
Definition: Indexer.h:221
int offset
Definition: FilePCD.cpp:64
TensorRef(const Tensor &t)
Definition: Indexer.h:133
OPEN3D_HOST_DEVICE char * GetWorkloadDataPtr(const TensorRef &tr, int64_t workload_idx) const
Definition: Indexer.h:475
int64_t shape_[MAX_DIMS]
Definition: Indexer.h:195
int64_t * GetMasterShape()
Definition: Indexer.h:316
void LogError(const char *format, const Args &... args)
Definition: Console.h:176
bool operator==(const PointXYZ A, const PointXYZ B)
Definition: Cloud.h:151
T data_[size]
Definition: Indexer.h:59
TensorRef input_
Definition: Indexer.h:250
OPEN3D_HOST_DEVICE char * GetOutputPtr(int64_t output_idx, int64_t workload_idx) const
Definition: Indexer.h:417
Definition: SizeVector.h:41
TensorRef & GetInput(int64_t i)
Returns input TensorRef.
Definition: Indexer.h:341
const int64_t * GetMasterStrides() const
Definition: Indexer.h:320
void Permute(const SizeVector &dims)
Definition: Indexer.h:147
Dtype GetDtype() const
Definition: Tensor.h:943
int size
Definition: FilePCD.cpp:59
int64_t GetStride(int64_t dim) const
Definition: Tensor.h:935
DtypePolicy
Definition: Indexer.h:199
#define OPEN3D_HOST_DEVICE
Definition: CUDAUtils.h:54
bool IsFinalOutput() const
Definition: Indexer.h:298
OPEN3D_HOST_DEVICE char * GetInputPtr(int64_t input_idx, int64_t workload_idx) const
Definition: Indexer.h:402
OffsetCalculator(int dims, const int64_t *sizes, const int64_t *const *strides)
Definition: Indexer.h:71
TensorRef & GetOutput()
Definition: Indexer.h:374
std::vector< std::unique_ptr< Indexer > > vec_
Definition: Indexer.h:542
OPEN3D_HOST_DEVICE T operator[](int i) const
Definition: Indexer.h:61
int64_t WrapDim(int64_t dim, int64_t max_dim, bool inclusive)
Wrap around negative dim.
Definition: ShapeUtil.cpp:150
Tensor operator*(T scalar_lhs, const Tensor &rhs)
Definition: Tensor.h:1115
const TensorRef & GetOutput() const
Definition: Indexer.h:381
SizeVector GetShape() const
Definition: Tensor.h:923
Iterator()
Definition: Indexer.h:533
const TensorRef & GetOutput(int64_t i) const
Definition: Indexer.h:364
int64_t ndims_
Definition: Indexer.h:251
int64_t NumDims() const
Returns number of dimensions of the Indexer.
Definition: Indexer.h:311
Definition: PinholeCameraIntrinsic.cpp:35
Definition: Tensor.h:48
const TensorRef & GetInput(int64_t i) const
Definition: Indexer.h:348
int64_t ByteSize() const
Definition: Dtype.h:71
bool ShouldAccumulate() const
Definition: Indexer.h:296
const int64_t * GetMasterShape() const
Definition: Indexer.h:315
Definition: Indexer.h:530
SmallArray & operator=(const SmallArray &)=default
OPEN3D_HOST_DEVICE T & operator[](int i)
Definition: Indexer.h:62
bool operator!=(const TensorRef &other) const
Definition: Indexer.h:190
Definition: Indexer.h:58
Definition: Indexer.h:261
int64_t byte_strides_[MAX_DIMS]
Definition: Indexer.h:196
Common CUDA utilities.
Indexer()
Definition: Indexer.h:263
bool operator==(const TensorRef &other) const
Definition: Indexer.h:178
int64_t NumInputs() const
Number of input Tensors.
Definition: Indexer.h:338
TensorRef & GetOutput(int64_t i)
Returns output TensorRef.
Definition: Indexer.h:357