Open3D (C++ API)
Indexer.h
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - Open3D: www.open3d.org -
3 // ----------------------------------------------------------------------------
4 // The MIT License (MIT)
5 //
6 // Copyright (c) 2018 www.open3d.org
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
14 //
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
17 //
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 // IN THE SOFTWARE.
25 // ----------------------------------------------------------------------------
26 
27 #pragma once
28 
29 #include "Open3D/Core/CUDAUtils.h"
30 #include "Open3D/Core/Dtype.h"
31 #include "Open3D/Core/ShapeUtil.h"
32 #include "Open3D/Core/SizeVector.h"
33 #include "Open3D/Core/Tensor.h"
34 #include "Open3D/Utility/Console.h"
35 
36 #include <sstream>
37 
38 namespace open3d {
39 
40 class Indexer;
41 
42 class IndexerIterator;
43 
44 // Maximum number of dimensions of TensorRef.
45 static constexpr int64_t MAX_DIMS = 10;
46 
47 // Maximum number of inputs of an op.
48 // MAX_INPUTS shall be >= MAX_DIMS to support advanced indexing.
49 static constexpr int64_t MAX_INPUTS = 10;
50 
51 // Maximum number of outputs of an op. This number can be increased when
52 // necessary.
53 static constexpr int64_t MAX_OUTPUTS = 2;
54 
55 // Fixed-size array type usable from host and device.
56 template <typename T, int size>
57 struct alignas(16) SmallArray {
58  T data_[size];
59 
60  OPEN3D_HOST_DEVICE T operator[](int i) const { return data_[i]; }
61  OPEN3D_HOST_DEVICE T& operator[](int i) { return data_[i]; }
62 
63  SmallArray() = default;
64  SmallArray(const SmallArray&) = default;
65  SmallArray& operator=(const SmallArray&) = default;
66 };
67 
68 template <int NARGS, typename index_t = uint32_t>
70  OffsetCalculator(int dims,
71  const int64_t* sizes,
72  const int64_t* const* strides)
73  : dims_(dims) {
74  if (dims_ > MAX_DIMS) {
75  utility::LogError("tensor has too many (>{}) dims_", MAX_DIMS);
76  }
77 
78  for (int i = 0; i < MAX_DIMS; ++i) {
79  if (i < dims_) {
80  sizes_[i] = sizes[i];
81  } else {
82  sizes_[i] = 1;
83  }
84  for (int arg = 0; arg < NARGS; arg++) {
85  strides_[i][arg] = i < dims_ ? strides[arg][i] : 0;
86  }
87  }
88  }
89 
91  index_t linear_idx) const {
93 #if defined(__CUDA_ARCH__)
94 #pragma unroll
95 #endif
96  for (int arg = 0; arg < NARGS; arg++) {
97  offsets[arg] = 0;
98  }
99 
100 #if defined(__CUDA_ARCH__)
101 #pragma unroll
102 #endif
103  for (int dim = 0; dim < MAX_DIMS; ++dim) {
104  if (dim == dims_) {
105  break;
106  }
107  index_t mod = linear_idx % sizes_[dim];
108  linear_idx = linear_idx / sizes_[dim];
109 
110 #if defined(__CUDA_ARCH__)
111 #pragma unroll
112 #endif
113  for (int arg = 0; arg < NARGS; arg++) {
114  offsets[arg] += mod * strides_[dim][arg];
115  }
116  }
117  return offsets;
118  }
119 
120  int dims_;
121  index_t sizes_[MAX_DIMS];
122  index_t strides_[MAX_DIMS][NARGS];
123 };
124 
126 struct TensorRef {
127  // The default copy constructor works on __device__ as well so we don't
128  // define it explicitly. shape_[MAX_DIMS] and strides[MAX_DIMS] will be
129  // copied fully.
130  TensorRef() : data_ptr_(nullptr), ndims_(0), dtype_byte_size_(0) {}
131 
132  TensorRef(const Tensor& t) {
133  if (t.NumDims() > MAX_DIMS) {
134  utility::LogError("Tenor has too many dimensions {} > {}.",
135  t.NumDims(), MAX_DIMS);
136  }
137  data_ptr_ = const_cast<void*>(t.GetDataPtr());
138  ndims_ = t.NumDims();
139  dtype_byte_size_ = DtypeUtil::ByteSize(t.GetDtype());
140  for (int64_t i = 0; i < ndims_; ++i) {
141  shape_[i] = t.GetShape(i);
142  byte_strides_[i] = t.GetStride(i) * dtype_byte_size_;
143  }
144  }
145 
146  void Permute(const SizeVector& dims) {
147  // Check dims are permuntation of [0, 1, 2, ..., n-1]
148  if (static_cast<int64_t>(dims.size()) != ndims_) {
149  utility::LogError("Number of dimensions mismatch {} != {}.",
150  dims.size(), ndims_);
151  }
152  std::vector<bool> seen_dims(ndims_, false);
153  for (const int64_t& dim : dims) {
154  seen_dims[dim] = true;
155  }
156  if (!std::all_of(seen_dims.begin(), seen_dims.end(),
157  [](bool seen) { return seen; })) {
159  "Permute dims must be a permuntation from 0 to {}.",
160  dims.size() - 1);
161  }
162 
163  // Map to new shape and strides
164  SizeVector new_shape(ndims_);
165  SizeVector new_byte_strides(ndims_);
166  for (int64_t i = 0; i < ndims_; ++i) {
167  int64_t old_dim = shape_util::WrapDim(dims[i], ndims_);
168  new_shape[i] = shape_[old_dim];
169  new_byte_strides[i] = byte_strides_[old_dim];
170  }
171  for (int64_t i = 0; i < ndims_; ++i) {
172  shape_[i] = new_shape[i];
173  byte_strides_[i] = new_byte_strides[i];
174  }
175  }
176 
177  bool operator==(const TensorRef& other) const {
178  bool rc = true;
179  rc = rc && (data_ptr_ == other.data_ptr_);
180  rc = rc && (ndims_ == other.ndims_);
181  rc = rc && (dtype_byte_size_ == other.dtype_byte_size_);
182  for (int64_t i = 0; i < ndims_; ++i) {
183  rc = rc && (shape_[i] == other.shape_[i]);
184  rc = rc && (byte_strides_[i] == other.byte_strides_[i]);
185  }
186  return rc;
187  }
188 
189  bool operator!=(const TensorRef& other) const { return !(*this == other); }
190 
191  void* data_ptr_;
192  int64_t ndims_ = 0;
193  int64_t dtype_byte_size_ = 0;
194  int64_t shape_[MAX_DIMS];
195  int64_t byte_strides_[MAX_DIMS];
196 };
197 
198 enum class DtypePolicy {
199  NONE, // Do not check. Expects the kernel to handle the conversion.
200  // E.g. in Copy kernel with type casting.
201  ALL_SAME, // All inputs and outputs to to have the same dtype.
202  INPUT_SAME, // All inputs have the same dtype.
203  INPUT_SAME_OUTPUT_BOOL // All inputs have the same dtype. Outputs
204  // have bool dtype.
205 };
206 
221 public:
222  TensorIterator(const Tensor& tensor)
223  : input_(TensorRef(tensor)), ndims_(tensor.NumDims()) {}
224 
226  int64_t num_workloads = 1;
227  for (int64_t i = 0; i < ndims_; ++i) {
228  num_workloads *= input_.shape_[i];
229  }
230  return num_workloads;
231  }
232 
233  OPEN3D_HOST_DEVICE void* GetPtr(int64_t workload_idx) const {
234  if (workload_idx < 0 || workload_idx >= NumWorkloads()) {
235  return nullptr;
236  }
237  int64_t offset = 0;
238  workload_idx = workload_idx * input_.dtype_byte_size_;
239  for (int64_t i = 0; i < ndims_; ++i) {
240  offset += workload_idx / input_.byte_strides_[i] *
241  input_.byte_strides_[i];
242  workload_idx = workload_idx % input_.byte_strides_[i];
243  }
244  return static_cast<void*>(static_cast<char*>(input_.data_ptr_) +
245  offset);
246  }
247 
248 protected:
250  int64_t ndims_;
251 };
252 
260 class Indexer {
261 public:
262  Indexer() {}
263  Indexer(const Indexer&) = default;
264  Indexer& operator=(const Indexer&) = default;
265 
269  Indexer(const std::vector<Tensor>& input_tensors,
270  const Tensor& output_tensor,
271  DtypePolicy dtype_policy = DtypePolicy::ALL_SAME,
272  const SizeVector& reduction_dims = {});
273 
274  Indexer(const std::vector<Tensor>& input_tensors,
275  const std::vector<Tensor>& output_tensors,
276  DtypePolicy dtype_policy = DtypePolicy::ALL_SAME,
277  const SizeVector& reduction_dims = {});
278 
280  bool CanUse32BitIndexing() const;
281 
284  IndexerIterator SplitTo32BitIndexing() const;
285 
289  std::unique_ptr<Indexer> SplitLargestDim();
290 
293  Indexer GetPerOutputIndexer(int64_t output_idx) const;
294 
295  bool ShouldAccumulate() const { return accumulate_; }
296 
297  bool IsFinalOutput() const { return final_output_; }
298 
304  void ShrinkDim(int64_t dim, int64_t start, int64_t size);
305 
307  int64_t NumReductionDims() const;
308 
310  int64_t NumDims() const { return ndims_; }
311 
314  const int64_t* GetMasterShape() const { return master_shape_; }
315  int64_t* GetMasterShape() { return master_shape_; }
316 
319  const int64_t* GetMasterStrides() const { return master_strides_; }
320 
331  int64_t NumWorkloads() const;
332 
334  int64_t NumOutputElements() const;
335 
337  int64_t NumInputs() const { return num_inputs_; }
338 
340  TensorRef& GetInput(int64_t i) {
341  if (i >= num_inputs_ || i < 0) {
342  utility::LogError("0 <= i < {} required, however, i = {}.",
343  num_inputs_, i);
344  }
345  return inputs_[i];
346  }
347  const TensorRef& GetInput(int64_t i) const {
348  if (i >= num_inputs_ || i < 0) {
349  utility::LogError("0 <= i < {} required, however, i = {}.",
350  num_inputs_, i);
351  }
352  return inputs_[i];
353  }
354 
356  TensorRef& GetOutput(int64_t i) {
357  if (i >= num_outputs_ || i < 0) {
358  utility::LogError("0 <= i < {} required, however, i = {}.",
359  num_outputs_, i);
360  }
361  return outputs_[i];
362  }
363  const TensorRef& GetOutput(int64_t i) const {
364  if (i >= num_outputs_ || i < 0) {
365  utility::LogError("0 <= i < {} required, however, i = {}.",
366  num_outputs_, i);
367  }
368  return outputs_[i];
369  }
370 
374  if (num_outputs_ > 1) {
375  utility::LogError("num_outputs_ == {} > 0, use GetOutput(i)",
376  num_outputs_);
377  }
378  return GetOutput(0);
379  }
380  const TensorRef& GetOutput() const {
381  if (num_outputs_ > 1) {
382  utility::LogError("num_outputs_ == {} > 0, use GetOutput(i)",
383  num_outputs_);
384  }
385  return GetOutput(0);
386  }
387 
389  bool IsReductionDim(int64_t dim) const {
390  // All outputs have the same shape and reduction dims. Even if they
391  // don't have the same initial strides, the reduced strides are always
392  // set to 0. Thus it is okay to use outputs_[0].
393  return outputs_[0].byte_strides_[dim] == 0 && master_shape_[dim] > 1;
394  }
395 
401  OPEN3D_HOST_DEVICE char* GetInputPtr(int64_t input_idx,
402  int64_t workload_idx) const {
403  if (input_idx < 0 || input_idx >= num_inputs_) {
404  return nullptr;
405  }
406  return GetWorkloadDataPtr(inputs_[input_idx], workload_idx);
407  }
408 
413  OPEN3D_HOST_DEVICE char* GetOutputPtr(int64_t workload_idx) const {
414  return GetWorkloadDataPtr(outputs_[0], workload_idx);
415  }
416  OPEN3D_HOST_DEVICE char* GetOutputPtr(int64_t output_idx,
417  int64_t workload_idx) const {
418  return GetWorkloadDataPtr(outputs_[output_idx], workload_idx);
419  }
420 
421 protected:
424  void CoalesceDimensions();
425 
426  // Permute reduction dimensions to front.
427  // TODO: Sort the dimensions based on strides in ascending orderto improve
428  // thread coalescing.
429  void ReorderDimensions(const SizeVector& reduction_dims);
430 
432  void UpdateMasterStrides();
433 
460  static void BroadcastRestride(TensorRef& src,
461  int64_t dst_ndims,
462  const int64_t* dst_shape);
463 
466  static void ReductionRestride(TensorRef& dst,
467  int64_t src_ndims,
468  const int64_t* src_shape,
469  const SizeVector& reduction_dims);
470 
475  int64_t workload_idx) const {
476  // For 0-sized input reduction op, the output Tensor
477  // workload_idx == 1 > NumWorkloads() == 0.
478  if (workload_idx < 0) {
479  return nullptr;
480  }
481  int64_t offset = 0;
482  for (int64_t i = 0; i < ndims_; ++i) {
483  offset += workload_idx / master_strides_[i] * tr.byte_strides_[i];
484  workload_idx = workload_idx % master_strides_[i];
485  }
486  return static_cast<char*>(tr.data_ptr_) + offset;
487  }
488 
490  int64_t num_inputs_ = 0;
491  int64_t num_outputs_ = 0;
492 
494  TensorRef inputs_[MAX_INPUTS];
495 
497  TensorRef outputs_[MAX_OUTPUTS];
498 
510  int64_t master_shape_[MAX_DIMS];
511 
514  int64_t master_strides_[MAX_DIMS];
515 
517  int64_t ndims_ = 0;
518 
522  bool final_output_ = true;
523 
526  bool accumulate_ = false;
527 };
528 
530 public:
531  struct Iterator {
532  Iterator(){};
533  Iterator(const Indexer& indexer);
534  Iterator(Iterator&& other) = default;
535 
536  Indexer& operator*() const;
537  Iterator& operator++();
538  bool operator==(const Iterator& other) const;
539  bool operator!=(const Iterator& other) const;
540 
541  std::vector<std::unique_ptr<Indexer>> vec_;
542  };
543 
544  IndexerIterator(const Indexer& indexer);
545 
546  Iterator begin() const;
547  Iterator end() const;
548 
549 private:
550  const Indexer& indexer_;
551 };
552 
553 } // namespace open3d
int64_t ndims_
Definition: Indexer.h:192
Dtype GetDtype() const
Definition: Tensor.h:742
const TensorRef & GetOutput(int64_t i) const
Definition: Indexer.h:363
bool IsFinalOutput() const
Definition: Indexer.h:297
T data_[size]
Definition: Indexer.h:58
void * GetDataPtr()
Definition: Tensor.h:738
Definition: Indexer.h:69
TensorRef(const Tensor &t)
Definition: Indexer.h:132
TensorRef input_
Definition: Indexer.h:249
Definition: Indexer.h:531
int64_t byte_strides_[MAX_DIMS]
Definition: Indexer.h:195
TensorRef & GetInput(int64_t i)
Returns input TensorRef.
Definition: Indexer.h:340
int64_t NumDims() const
Definition: Tensor.h:750
bool IsReductionDim(int64_t dim) const
Returns true if the dim -th dimension is reduced.
Definition: Indexer.h:389
int offset
Definition: FilePCD.cpp:62
Indexer()
Definition: Indexer.h:262
int64_t ndims_
Definition: Indexer.h:250
void LogError(const char *format, const Args &... args)
Definition: Console.h:174
TensorRef & GetOutput()
Definition: Indexer.h:373
OPEN3D_HOST_DEVICE char * GetOutputPtr(int64_t output_idx, int64_t workload_idx) const
Definition: Indexer.h:416
bool operator!=(const TensorRef &other) const
Definition: Indexer.h:189
bool operator==(const TensorRef &other) const
Definition: Indexer.h:177
int64_t * GetMasterShape()
Definition: Indexer.h:315
TensorRef & GetOutput(int64_t i)
Returns output TensorRef.
Definition: Indexer.h:356
OPEN3D_HOST_DEVICE char * GetWorkloadDataPtr(const TensorRef &tr, int64_t workload_idx) const
Definition: Indexer.h:474
int dims_
Definition: Indexer.h:120
const TensorRef & GetInput(int64_t i) const
Definition: Indexer.h:347
OPEN3D_HOST_DEVICE char * GetOutputPtr(int64_t workload_idx) const
Definition: Indexer.h:413
int size
Definition: FilePCD.cpp:57
#define OPEN3D_HOST_DEVICE
Definition: CUDAUtils.h:54
Definition: SizeVector.h:40
A minimalistic class that reference a Tensor.
Definition: Indexer.h:126
Tensor operator*(T scalar_lhs, const Tensor &rhs)
Definition: Tensor.h:886
Definition: Indexer.h:529
static int64_t ByteSize(const Dtype &dtype)
Definition: Dtype.h:61
SmallArray()=default
const int64_t * GetMasterShape() const
Definition: Indexer.h:314
OPEN3D_HOST_DEVICE T operator[](int i) const
Definition: Indexer.h:60
OPEN3D_HOST_DEVICE int64_t NumWorkloads() const
Definition: Indexer.h:225
OPEN3D_HOST_DEVICE T & operator[](int i)
Definition: Indexer.h:61
void Permute(const SizeVector &dims)
Definition: Indexer.h:146
Definition: Indexer.h:220
int64_t GetStride(int64_t dim) const
Definition: Tensor.h:734
Iterator()
Definition: Indexer.h:532
OPEN3D_HOST_DEVICE void * GetPtr(int64_t workload_idx) const
Definition: Indexer.h:233
const int64_t * GetMasterStrides() const
Definition: Indexer.h:319
int64_t shape_[MAX_DIMS]
Definition: Indexer.h:194
SmallArray & operator=(const SmallArray &)=default
int64_t NumDims() const
Returns number of dimensions of the Indexer.
Definition: Indexer.h:310
TensorRef()
Definition: Indexer.h:130
Definition: Open3DViewer.h:29
void * data_ptr_
Definition: Indexer.h:191
Definition: Indexer.h:260
int64_t NumInputs() const
Number of input Tensors.
Definition: Indexer.h:337
OffsetCalculator(int dims, const int64_t *sizes, const int64_t *const *strides)
Definition: Indexer.h:70
int64_t WrapDim(int64_t dim, int64_t max_dim)
Wrap around negative dim.
Definition: ShapeUtil.cpp:147
std::vector< std::unique_ptr< Indexer > > vec_
Definition: Indexer.h:541
bool ShouldAccumulate() const
Definition: Indexer.h:295
OPEN3D_HOST_DEVICE char * GetInputPtr(int64_t input_idx, int64_t workload_idx) const
Definition: Indexer.h:401
Definition: Tensor.h:46
DtypePolicy
Definition: Indexer.h:198
Common CUDA utilities.
Definition: Indexer.h:57
TensorIterator(const Tensor &tensor)
Definition: Indexer.h:222
const TensorRef & GetOutput() const
Definition: Indexer.h:380
int64_t dtype_byte_size_
Definition: Indexer.h:193
SizeVector GetShape() const
Definition: Tensor.h:722