Open3D (C++ API)  0.17.0
Indexer.h
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - Open3D: www.open3d.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2023 www.open3d.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
8 #pragma once
9 
10 #include <sstream>
11 
12 #include "open3d/core/CUDAUtils.h"
13 #include "open3d/core/Dtype.h"
14 #include "open3d/core/ShapeUtil.h"
15 #include "open3d/core/SizeVector.h"
16 #include "open3d/core/Tensor.h"
17 #include "open3d/utility/Logging.h"
18 #include "open3d/utility/MiniVec.h"
19 
20 // The generated "Indexer_ispc.h" header will not be available outside the
21 // library. Therefore, forward declare all exported ISPC classes.
22 #ifdef BUILD_ISPC_MODULE
23 namespace ispc {
24 struct TensorRef;
25 struct Indexer;
26 } // namespace ispc
27 #endif
28 
29 namespace open3d {
30 namespace core {
31 
32 class Indexer;
33 
34 class IndexerIterator;
35 
36 // Maximum number of dimensions of TensorRef.
37 static constexpr int64_t MAX_DIMS = 10;
38 
39 // Maximum number of inputs of an op.
40 // MAX_INPUTS shall be >= MAX_DIMS to support advanced indexing.
41 static constexpr int64_t MAX_INPUTS = 10;
42 
43 // Maximum number of outputs of an op. This number can be increased when
44 // necessary.
45 static constexpr int64_t MAX_OUTPUTS = 2;
46 
47 template <int NARGS, typename index_t = uint32_t>
49  OffsetCalculator(int dims,
50  const int64_t* sizes,
51  const int64_t* const* strides)
52  : dims_(dims) {
53  if (dims_ > MAX_DIMS) {
54  utility::LogError("tensor has too many (>{}) dims_", MAX_DIMS);
55  }
56 
57  for (int i = 0; i < MAX_DIMS; ++i) {
58  if (i < dims_) {
59  sizes_[i] = sizes[i];
60  } else {
61  sizes_[i] = 1;
62  }
63  for (int arg = 0; arg < NARGS; arg++) {
64  strides_[i][arg] = i < dims_ ? strides[arg][i] : 0;
65  }
66  }
67  }
68 
70  index_t linear_idx) const {
72 #if defined(__CUDA_ARCH__)
73 #pragma unroll
74 #endif
75  for (int arg = 0; arg < NARGS; arg++) {
76  offsets[arg] = 0;
77  }
78 
79 #if defined(__CUDA_ARCH__)
80 #pragma unroll
81 #endif
82  for (int dim = 0; dim < MAX_DIMS; ++dim) {
83  if (dim == dims_) {
84  break;
85  }
86  index_t mod = linear_idx % sizes_[dim];
87  linear_idx = linear_idx / sizes_[dim];
88 
89 #if defined(__CUDA_ARCH__)
90 #pragma unroll
91 #endif
92  for (int arg = 0; arg < NARGS; arg++) {
93  offsets[arg] += mod * strides_[dim][arg];
94  }
95  }
96  return offsets;
97  }
98 
99  int dims_;
100  index_t sizes_[MAX_DIMS];
101  index_t strides_[MAX_DIMS][NARGS];
102 };
103 
105 struct TensorRef {
106  // The default copy constructor works on __device__ as well so we don't
107  // define it explicitly. shape_[MAX_DIMS] and strides[MAX_DIMS] will be
108  // copied fully.
109  TensorRef() : data_ptr_(nullptr), ndims_(0), dtype_byte_size_(0) {}
110 
111  TensorRef(const Tensor& t) {
112  if (t.NumDims() > MAX_DIMS) {
113  utility::LogError("Tenor has too many dimensions {} > {}.",
114  t.NumDims(), MAX_DIMS);
115  }
116  data_ptr_ = const_cast<void*>(t.GetDataPtr());
117  ndims_ = t.NumDims();
119  for (int64_t i = 0; i < ndims_; ++i) {
120  shape_[i] = t.GetShape(i);
122  }
123  }
124 
131  void Permute(const SizeVector& dims) {
132  // Check dims are permuntation of [0, 1, 2, ..., n-1]
133  if (static_cast<int64_t>(dims.size()) != ndims_) {
134  utility::LogError("Number of dimensions mismatch {} != {}.",
135  dims.size(), ndims_);
136  }
137  std::vector<bool> seen_dims(ndims_, false);
138  for (const int64_t& dim : dims) {
139  seen_dims[dim] = true;
140  }
141  if (!std::all_of(seen_dims.begin(), seen_dims.end(),
142  [](bool seen) { return seen; })) {
144  "Permute dims must be a permuntation from 0 to {}.",
145  dims.size() - 1);
146  }
147 
148  // Map to new shape and strides
149  SizeVector new_shape(ndims_);
150  SizeVector new_byte_strides(ndims_);
151  for (int64_t i = 0; i < ndims_; ++i) {
152  int64_t old_dim = shape_util::WrapDim(dims[i], ndims_);
153  new_shape[i] = shape_[old_dim];
154  new_byte_strides[i] = byte_strides_[old_dim];
155  }
156  for (int64_t i = 0; i < ndims_; ++i) {
157  shape_[i] = new_shape[i];
158  byte_strides_[i] = new_byte_strides[i];
159  }
160  }
161 
163  inline bool IsContiguous() const {
164  SizeVector shape(ndims_);
165  SizeVector strides(ndims_);
166  for (int64_t i = 0; i < ndims_; ++i) {
167  shape[i] = shape_[i];
168  strides[i] = byte_strides_[i] / dtype_byte_size_;
169  }
170  return shape_util::DefaultStrides(shape) == strides;
171  }
172 
173  bool operator==(const TensorRef& other) const {
174  bool rc = true;
175  rc = rc && (data_ptr_ == other.data_ptr_);
176  rc = rc && (ndims_ == other.ndims_);
177  rc = rc && (dtype_byte_size_ == other.dtype_byte_size_);
178  for (int64_t i = 0; i < ndims_; ++i) {
179  rc = rc && (shape_[i] == other.shape_[i]);
180  rc = rc && (byte_strides_[i] == other.byte_strides_[i]);
181  }
182  return rc;
183  }
184 
185  bool operator!=(const TensorRef& other) const { return !(*this == other); }
186 
187 #ifdef BUILD_ISPC_MODULE
189  ispc::TensorRef ToISPC() const;
190 #endif
191 
192  void* data_ptr_;
193  int64_t ndims_ = 0;
194  int64_t dtype_byte_size_ = 0;
195  int64_t shape_[MAX_DIMS];
196  int64_t byte_strides_[MAX_DIMS];
197 };
198 
199 enum class DtypePolicy {
200  NONE, // Do not check. Expects the kernel to handle the conversion.
201  // E.g. in Copy kernel with type casting.
202  ALL_SAME, // All inputs and outputs to to have the same dtype.
203  INPUT_SAME, // All inputs have the same dtype.
204  INPUT_SAME_OUTPUT_BOOL // All inputs have the same dtype. Outputs
205  // have bool dtype.
206 };
207 
222 public:
223  TensorIterator(const Tensor& tensor)
224  : input_(TensorRef(tensor)), ndims_(tensor.NumDims()) {}
225 
227  int64_t num_workloads = 1;
228  for (int64_t i = 0; i < ndims_; ++i) {
229  num_workloads *= input_.shape_[i];
230  }
231  return num_workloads;
232  }
233 
234  OPEN3D_HOST_DEVICE void* GetPtr(int64_t workload_idx) const {
235  if (workload_idx < 0 || workload_idx >= NumWorkloads()) {
236  return nullptr;
237  }
238  int64_t offset = 0;
239  workload_idx = workload_idx * input_.dtype_byte_size_;
240  for (int64_t i = 0; i < ndims_; ++i) {
241  offset += workload_idx / input_.byte_strides_[i] *
243  workload_idx = workload_idx % input_.byte_strides_[i];
244  }
245  return static_cast<void*>(static_cast<char*>(input_.data_ptr_) +
246  offset);
247  }
248 
249 protected:
251  int64_t ndims_;
252 };
253 
261 class Indexer {
262 public:
263  Indexer() {}
264  Indexer(const Indexer&) = default;
265  Indexer& operator=(const Indexer&) = default;
266 
270  Indexer(const std::vector<Tensor>& input_tensors,
271  const Tensor& output_tensor,
272  DtypePolicy dtype_policy = DtypePolicy::ALL_SAME,
273  const SizeVector& reduction_dims = {});
274 
275  Indexer(const std::vector<Tensor>& input_tensors,
276  const std::vector<Tensor>& output_tensors,
277  DtypePolicy dtype_policy = DtypePolicy::ALL_SAME,
278  const SizeVector& reduction_dims = {});
279 
281  bool CanUse32BitIndexing() const;
282 
285  IndexerIterator SplitTo32BitIndexing() const;
286 
290  std::unique_ptr<Indexer> SplitLargestDim();
291 
294  Indexer GetPerOutputIndexer(int64_t output_idx) const;
295 
296  bool ShouldAccumulate() const { return accumulate_; }
297 
298  bool IsFinalOutput() const { return final_output_; }
299 
305  void ShrinkDim(int64_t dim, int64_t start, int64_t size);
306 
308  int64_t NumReductionDims() const;
309 
311  int64_t NumDims() const { return ndims_; }
312 
315  const int64_t* GetMasterShape() const { return master_shape_; }
316  int64_t* GetMasterShape() { return master_shape_; }
317 
320  const int64_t* GetMasterStrides() const { return master_strides_; }
321 
332  int64_t NumWorkloads() const;
333 
335  int64_t NumOutputElements() const;
336 
338  int64_t NumInputs() const { return num_inputs_; }
339 
341  int64_t NumOutputs() const { return num_outputs_; }
342 
344  TensorRef& GetInput(int64_t i) {
345  if (i >= num_inputs_ || i < 0) {
346  utility::LogError("0 <= i < {} required, however, i = {}.",
347  num_inputs_, i);
348  }
349  return inputs_[i];
350  }
351  const TensorRef& GetInput(int64_t i) const {
352  if (i >= num_inputs_ || i < 0) {
353  utility::LogError("0 <= i < {} required, however, i = {}.",
354  num_inputs_, i);
355  }
356  return inputs_[i];
357  }
358 
360  TensorRef& GetOutput(int64_t i) {
361  if (i >= num_outputs_ || i < 0) {
362  utility::LogError("0 <= i < {} required, however, i = {}.",
363  num_outputs_, i);
364  }
365  return outputs_[i];
366  }
367  const TensorRef& GetOutput(int64_t i) const {
368  if (i >= num_outputs_ || i < 0) {
369  utility::LogError("0 <= i < {} required, however, i = {}.",
370  num_outputs_, i);
371  }
372  return outputs_[i];
373  }
374 
378  if (num_outputs_ > 1) {
379  utility::LogError("num_outputs_ == {} > 0, use GetOutput(i)",
380  num_outputs_);
381  }
382  return GetOutput(0);
383  }
384  const TensorRef& GetOutput() const {
385  if (num_outputs_ > 1) {
386  utility::LogError("num_outputs_ == {} > 0, use GetOutput(i)",
387  num_outputs_);
388  }
389  return GetOutput(0);
390  }
391 
393  bool IsReductionDim(int64_t dim) const {
394  // All outputs have the same shape and reduction dims. Even if they
395  // don't have the same initial strides, the reduced strides are always
396  // set to 0. Thus it is okay to use outputs_[0].
397  return outputs_[0].byte_strides_[dim] == 0 && master_shape_[dim] > 1;
398  }
399 
405  OPEN3D_HOST_DEVICE char* GetInputPtr(int64_t input_idx,
406  int64_t workload_idx) const {
407  if (input_idx < 0 || input_idx >= num_inputs_) {
408  return nullptr;
409  }
410  return GetWorkloadDataPtr(inputs_[input_idx],
411  inputs_contiguous_[input_idx], workload_idx);
412  }
413 
422  template <typename T>
423  OPEN3D_HOST_DEVICE T* GetInputPtr(int64_t input_idx,
424  int64_t workload_idx) const {
425  if (input_idx < 0 || input_idx >= num_inputs_) {
426  return nullptr;
427  }
428  return GetWorkloadDataPtr<T>(inputs_[input_idx],
429  inputs_contiguous_[input_idx],
430  workload_idx);
431  }
432 
437  OPEN3D_HOST_DEVICE char* GetOutputPtr(int64_t workload_idx) const {
439  workload_idx);
440  }
441 
449  template <typename T>
450  OPEN3D_HOST_DEVICE T* GetOutputPtr(int64_t workload_idx) const {
451  return GetWorkloadDataPtr<T>(outputs_[0], outputs_contiguous_[0],
452  workload_idx);
453  }
454 
460  OPEN3D_HOST_DEVICE char* GetOutputPtr(int64_t output_idx,
461  int64_t workload_idx) const {
462  return GetWorkloadDataPtr(outputs_[output_idx],
463  outputs_contiguous_[output_idx],
464  workload_idx);
465  }
466 
472  template <typename T>
473  OPEN3D_HOST_DEVICE T* GetOutputPtr(int64_t output_idx,
474  int64_t workload_idx) const {
475  return GetWorkloadDataPtr<T>(outputs_[output_idx],
476  outputs_contiguous_[output_idx],
477  workload_idx);
478  }
479 
480 #ifdef BUILD_ISPC_MODULE
482  ispc::Indexer ToISPC() const;
483 #endif
484 
485 protected:
488  void CoalesceDimensions();
489 
490  // Permute reduction dimensions to front.
491  // TODO: Sort the dimensions based on strides in ascending orderto improve
492  // thread coalescing.
493  void ReorderDimensions(const SizeVector& reduction_dims);
494 
496  void UpdateMasterStrides();
497 
499  void UpdateContiguousFlags();
500 
527  static void BroadcastRestride(TensorRef& src,
528  int64_t dst_ndims,
529  const int64_t* dst_shape);
530 
533  static void ReductionRestride(TensorRef& dst,
534  int64_t src_ndims,
535  const int64_t* src_shape,
536  const SizeVector& reduction_dims);
537 
542  bool tr_contiguous,
543  int64_t workload_idx) const {
544  // For 0-sized input reduction op, the output Tensor
545  // workload_idx == 1 > NumWorkloads() == 0.
546  if (workload_idx < 0) {
547  return nullptr;
548  }
549  if (tr_contiguous) {
550  return static_cast<char*>(tr.data_ptr_) +
551  workload_idx * tr.dtype_byte_size_;
552  } else {
553  int64_t offset = 0;
554  for (int64_t i = 0; i < ndims_; ++i) {
555  offset +=
556  workload_idx / master_strides_[i] * tr.byte_strides_[i];
557  workload_idx = workload_idx % master_strides_[i];
558  }
559  return static_cast<char*>(tr.data_ptr_) + offset;
560  }
561  }
562 
569  template <typename T>
571  bool tr_contiguous,
572  int64_t workload_idx) const {
573  // For 0-sized input reduction op, the output Tensor
574  // workload_idx == 1 > NumWorkloads() == 0.
575  if (workload_idx < 0) {
576  return nullptr;
577  }
578  if (tr_contiguous) {
579  return static_cast<T*>(tr.data_ptr_) + workload_idx;
580  } else {
581  int64_t offset = 0;
582  for (int64_t i = 0; i < ndims_; ++i) {
583  offset +=
584  workload_idx / master_strides_[i] * tr.byte_strides_[i];
585  workload_idx = workload_idx % master_strides_[i];
586  }
587  return static_cast<T*>(static_cast<void*>(
588  static_cast<char*>(tr.data_ptr_) + offset));
589  }
590  }
591 
593  int64_t num_inputs_ = 0;
594  int64_t num_outputs_ = 0;
595 
597  TensorRef inputs_[MAX_INPUTS];
598 
600  TensorRef outputs_[MAX_OUTPUTS];
601 
603  bool inputs_contiguous_[MAX_INPUTS];
604 
606  bool outputs_contiguous_[MAX_OUTPUTS];
607 
619  int64_t master_shape_[MAX_DIMS];
620 
623  int64_t master_strides_[MAX_DIMS];
624 
626  int64_t ndims_ = 0;
627 
631  bool final_output_ = true;
632 
635  bool accumulate_ = false;
636 };
637 
639 public:
640  struct Iterator {
641  Iterator(){};
642  Iterator(const Indexer& indexer);
643  Iterator(Iterator&& other) = default;
644 
645  Indexer& operator*() const;
646  Iterator& operator++();
647  bool operator==(const Iterator& other) const;
648  bool operator!=(const Iterator& other) const;
649 
650  std::vector<std::unique_ptr<Indexer>> vec_;
651  };
652 
653  IndexerIterator(const Indexer& indexer);
654 
655  Iterator begin() const;
656  Iterator end() const;
657 
658 private:
659  const Indexer& indexer_;
660 };
661 
662 } // namespace core
663 } // namespace open3d
Common CUDA utilities.
#define OPEN3D_HOST_DEVICE
Definition: CUDAUtils.h:44
#define LogError(...)
Definition: Logging.h:48
int64_t ByteSize() const
Definition: Dtype.h:58
Definition: Indexer.h:261
void UpdateContiguousFlags()
Update input_contiguous_ and output_contiguous_.
Definition: Indexer.cpp:565
bool inputs_contiguous_[MAX_INPUTS]
Array of contiguous flags for all input TensorRefs.
Definition: Indexer.h:603
bool outputs_contiguous_[MAX_OUTPUTS]
Array of contiguous flags for all output TensorRefs.
Definition: Indexer.h:606
OPEN3D_HOST_DEVICE T * GetOutputPtr(int64_t output_idx, int64_t workload_idx) const
Definition: Indexer.h:473
Indexer()
Definition: Indexer.h:263
int64_t num_outputs_
Definition: Indexer.h:594
OPEN3D_HOST_DEVICE char * GetWorkloadDataPtr(const TensorRef &tr, bool tr_contiguous, int64_t workload_idx) const
Definition: Indexer.h:541
static void ReductionRestride(TensorRef &dst, int64_t src_ndims, const int64_t *src_shape, const SizeVector &reduction_dims)
Definition: Indexer.cpp:602
TensorRef outputs_[MAX_OUTPUTS]
Array of output TensorRefs.
Definition: Indexer.h:600
bool IsReductionDim(int64_t dim) const
Returns true if the dim -th dimension is reduced.
Definition: Indexer.h:393
const TensorRef & GetOutput() const
Definition: Indexer.h:384
OPEN3D_HOST_DEVICE T * GetInputPtr(int64_t input_idx, int64_t workload_idx) const
Definition: Indexer.h:423
int64_t NumReductionDims() const
Returns the number of reduction dimensions.
Definition: Indexer.cpp:395
int64_t NumInputs() const
Number of input Tensors.
Definition: Indexer.h:338
OPEN3D_HOST_DEVICE char * GetOutputPtr(int64_t workload_idx) const
Definition: Indexer.h:437
Indexer(const Indexer &)=default
bool IsFinalOutput() const
Definition: Indexer.h:298
const TensorRef & GetOutput(int64_t i) const
Definition: Indexer.h:367
void ReorderDimensions(const SizeVector &reduction_dims)
Definition: Indexer.cpp:491
void CoalesceDimensions()
Definition: Indexer.cpp:425
int64_t master_shape_[MAX_DIMS]
Definition: Indexer.h:619
int64_t NumOutputElements() const
Returns the number of output elements.
Definition: Indexer.cpp:414
int64_t num_inputs_
Number of input and output Tensors.
Definition: Indexer.h:593
bool accumulate_
Definition: Indexer.h:635
bool CanUse32BitIndexing() const
Returns true iff the maximum_offsets in bytes are smaller than 2^31 - 1.
Definition: Indexer.cpp:198
TensorRef inputs_[MAX_INPUTS]
Array of input TensorRefs.
Definition: Indexer.h:597
TensorRef & GetOutput(int64_t i)
Returns output TensorRef.
Definition: Indexer.h:360
bool ShouldAccumulate() const
Definition: Indexer.h:296
const TensorRef & GetInput(int64_t i) const
Definition: Indexer.h:351
int64_t master_strides_[MAX_DIMS]
Definition: Indexer.h:623
Indexer GetPerOutputIndexer(int64_t output_idx) const
Definition: Indexer.cpp:303
int64_t * GetMasterShape()
Definition: Indexer.h:316
Indexer & operator=(const Indexer &)=default
const int64_t * GetMasterShape() const
Definition: Indexer.h:315
std::unique_ptr< Indexer > SplitLargestDim()
Definition: Indexer.cpp:238
int64_t NumWorkloads() const
Definition: Indexer.cpp:406
IndexerIterator SplitTo32BitIndexing() const
Definition: Indexer.cpp:234
OPEN3D_HOST_DEVICE T * GetWorkloadDataPtr(const TensorRef &tr, bool tr_contiguous, int64_t workload_idx) const
Definition: Indexer.h:570
void UpdateMasterStrides()
Update master_strides_ based on master_shape_.
Definition: Indexer.cpp:556
static void BroadcastRestride(TensorRef &src, int64_t dst_ndims, const int64_t *dst_shape)
Definition: Indexer.cpp:575
bool final_output_
Definition: Indexer.h:631
OPEN3D_HOST_DEVICE char * GetOutputPtr(int64_t output_idx, int64_t workload_idx) const
Definition: Indexer.h:460
OPEN3D_HOST_DEVICE char * GetInputPtr(int64_t input_idx, int64_t workload_idx) const
Definition: Indexer.h:405
TensorRef & GetInput(int64_t i)
Returns input TensorRef.
Definition: Indexer.h:344
TensorRef & GetOutput()
Definition: Indexer.h:377
OPEN3D_HOST_DEVICE T * GetOutputPtr(int64_t workload_idx) const
Definition: Indexer.h:450
int64_t ndims_
Indexer's global number of dimensions.
Definition: Indexer.h:626
void ShrinkDim(int64_t dim, int64_t start, int64_t size)
Definition: Indexer.cpp:364
int64_t NumDims() const
Returns number of dimensions of the Indexer.
Definition: Indexer.h:311
int64_t NumOutputs() const
Number of output Tensors.
Definition: Indexer.h:341
const int64_t * GetMasterStrides() const
Definition: Indexer.h:320
Definition: Indexer.h:638
Iterator end() const
Definition: Indexer.cpp:671
Iterator begin() const
Definition: Indexer.cpp:667
IndexerIterator(const Indexer &indexer)
Definition: Indexer.cpp:641
Definition: SizeVector.h:69
size_t size() const
Definition: SmallVector.h:119
Definition: Tensor.h:32
T * GetDataPtr()
Definition: Tensor.h:1133
SizeVector GetShape() const
Definition: Tensor.h:1116
int64_t NumDims() const
Definition: Tensor.h:1161
int64_t GetStride(int64_t dim) const
Definition: Tensor.h:1128
Dtype GetDtype() const
Definition: Tensor.h:1153
Definition: Indexer.h:221
OPEN3D_HOST_DEVICE void * GetPtr(int64_t workload_idx) const
Definition: Indexer.h:234
OPEN3D_HOST_DEVICE int64_t NumWorkloads() const
Definition: Indexer.h:226
TensorRef input_
Definition: Indexer.h:250
TensorIterator(const Tensor &tensor)
Definition: Indexer.h:223
int64_t ndims_
Definition: Indexer.h:251
int size
Definition: FilePCD.cpp:40
int offset
Definition: FilePCD.cpp:45
int64_t WrapDim(int64_t dim, int64_t max_dim, bool inclusive)
Wrap around negative dim.
Definition: ShapeUtil.cpp:131
SizeVector DefaultStrides(const SizeVector &shape)
Compute default strides for a shape when a tensor is contiguous.
Definition: ShapeUtil.cpp:214
DtypePolicy
Definition: Indexer.h:199
int index_t
Definition: VoxelBlockGrid.h:22
Definition: PinholeCameraIntrinsic.cpp:16
bool operator!=(const Iterator &other) const
Definition: Indexer.cpp:663
Iterator()
Definition: Indexer.h:641
std::vector< std::unique_ptr< Indexer > > vec_
Definition: Indexer.h:650
Iterator(Iterator &&other)=default
Indexer & operator*() const
Definition: Indexer.cpp:649
bool operator==(const Iterator &other) const
Definition: Indexer.cpp:660
Iterator & operator++()
Definition: Indexer.cpp:651
Definition: Indexer.h:48
index_t sizes_[MAX_DIMS]
Definition: Indexer.h:100
int dims_
Definition: Indexer.h:99
OPEN3D_HOST_DEVICE utility::MiniVec< index_t, NARGS > get(index_t linear_idx) const
Definition: Indexer.h:69
OffsetCalculator(int dims, const int64_t *sizes, const int64_t *const *strides)
Definition: Indexer.h:49
index_t strides_[MAX_DIMS][NARGS]
Definition: Indexer.h:101
A minimalistic class that reference a Tensor.
Definition: Indexer.h:105
int64_t dtype_byte_size_
Definition: Indexer.h:194
void Permute(const SizeVector &dims)
Permute (dimension shuffle) the reference to a Tensor.
Definition: Indexer.h:131
int64_t ndims_
Definition: Indexer.h:193
bool operator!=(const TensorRef &other) const
Definition: Indexer.h:185
TensorRef(const Tensor &t)
Definition: Indexer.h:111
TensorRef()
Definition: Indexer.h:109
int64_t shape_[MAX_DIMS]
Definition: Indexer.h:195
bool IsContiguous() const
Returns True if the underlying memory buffer is contiguous.
Definition: Indexer.h:163
void * data_ptr_
Definition: Indexer.h:192
bool operator==(const TensorRef &other) const
Definition: Indexer.h:173
int64_t byte_strides_[MAX_DIMS]
Definition: Indexer.h:196
Definition: MiniVec.h:24