22 #ifdef BUILD_ISPC_MODULE
34 class IndexerIterator;
37 static constexpr int64_t MAX_DIMS = 10;
41 static constexpr int64_t MAX_INPUTS = 10;
45 static constexpr int64_t MAX_OUTPUTS = 2;
47 template <
int NARGS,
typename index_t = u
int32_t>
51 const int64_t*
const* strides)
53 if (
dims_ > MAX_DIMS) {
57 for (
int i = 0; i < MAX_DIMS; ++i) {
63 for (
int arg = 0; arg < NARGS; arg++) {
72 #if defined(__CUDA_ARCH__)
75 for (
int arg = 0; arg < NARGS; arg++) {
79 #if defined(__CUDA_ARCH__)
82 for (
int dim = 0; dim < MAX_DIMS; ++dim) {
87 linear_idx = linear_idx /
sizes_[dim];
89 #if defined(__CUDA_ARCH__)
92 for (
int arg = 0; arg < NARGS; arg++) {
93 offsets[arg] += mod *
strides_[dim][arg];
119 for (int64_t i = 0; i <
ndims_; ++i) {
133 if (
static_cast<int64_t
>(dims.
size()) !=
ndims_) {
137 std::vector<bool> seen_dims(
ndims_,
false);
138 for (
const int64_t& dim : dims) {
139 seen_dims[dim] =
true;
141 if (!std::all_of(seen_dims.begin(), seen_dims.end(),
142 [](
bool seen) { return seen; })) {
144 "Permute dims must be a permuntation from 0 to {}.",
151 for (int64_t i = 0; i <
ndims_; ++i) {
153 new_shape[i] =
shape_[old_dim];
156 for (int64_t i = 0; i <
ndims_; ++i) {
166 for (int64_t i = 0; i <
ndims_; ++i) {
178 for (int64_t i = 0; i <
ndims_; ++i) {
187 #ifdef BUILD_ISPC_MODULE
189 ispc::TensorRef ToISPC()
const;
227 int64_t num_workloads = 1;
228 for (int64_t i = 0; i <
ndims_; ++i) {
231 return num_workloads;
235 if (workload_idx < 0 || workload_idx >=
NumWorkloads()) {
240 for (int64_t i = 0; i <
ndims_; ++i) {
270 Indexer(
const std::vector<Tensor>& input_tensors,
271 const Tensor& output_tensor,
275 Indexer(
const std::vector<Tensor>& input_tensors,
276 const std::vector<Tensor>& output_tensors,
406 int64_t workload_idx)
const {
422 template <
typename T>
424 int64_t workload_idx)
const {
428 return GetWorkloadDataPtr<T>(
inputs_[input_idx],
449 template <
typename T>
461 int64_t workload_idx)
const {
472 template <
typename T>
474 int64_t workload_idx)
const {
475 return GetWorkloadDataPtr<T>(
outputs_[output_idx],
480 #ifdef BUILD_ISPC_MODULE
482 ispc::Indexer ToISPC()
const;
529 const int64_t* dst_shape);
535 const int64_t* src_shape,
543 int64_t workload_idx)
const {
546 if (workload_idx < 0) {
550 return static_cast<char*
>(tr.
data_ptr_) +
554 for (int64_t i = 0; i <
ndims_; ++i) {
569 template <
typename T>
572 int64_t workload_idx)
const {
575 if (workload_idx < 0) {
579 return static_cast<T*
>(tr.
data_ptr_) + workload_idx;
582 for (int64_t i = 0; i <
ndims_; ++i) {
587 return static_cast<T*
>(
static_cast<void*
>(
650 std::vector<std::unique_ptr<Indexer>>
vec_;
#define OPEN3D_HOST_DEVICE
Definition: CUDAUtils.h:44
#define LogError(...)
Definition: Logging.h:48
int64_t ByteSize() const
Definition: Dtype.h:58
Definition: Indexer.h:261
void UpdateContiguousFlags()
Update input_contiguous_ and output_contiguous_.
Definition: Indexer.cpp:565
bool inputs_contiguous_[MAX_INPUTS]
Array of contiguous flags for all input TensorRefs.
Definition: Indexer.h:603
bool outputs_contiguous_[MAX_OUTPUTS]
Array of contiguous flags for all output TensorRefs.
Definition: Indexer.h:606
OPEN3D_HOST_DEVICE T * GetOutputPtr(int64_t output_idx, int64_t workload_idx) const
Definition: Indexer.h:473
Indexer()
Definition: Indexer.h:263
int64_t num_outputs_
Definition: Indexer.h:594
OPEN3D_HOST_DEVICE char * GetWorkloadDataPtr(const TensorRef &tr, bool tr_contiguous, int64_t workload_idx) const
Definition: Indexer.h:541
static void ReductionRestride(TensorRef &dst, int64_t src_ndims, const int64_t *src_shape, const SizeVector &reduction_dims)
Definition: Indexer.cpp:602
TensorRef outputs_[MAX_OUTPUTS]
Array of output TensorRefs.
Definition: Indexer.h:600
bool IsReductionDim(int64_t dim) const
Returns true if the dim -th dimension is reduced.
Definition: Indexer.h:393
const TensorRef & GetOutput() const
Definition: Indexer.h:384
OPEN3D_HOST_DEVICE T * GetInputPtr(int64_t input_idx, int64_t workload_idx) const
Definition: Indexer.h:423
int64_t NumReductionDims() const
Returns the number of reduction dimensions.
Definition: Indexer.cpp:395
int64_t NumInputs() const
Number of input Tensors.
Definition: Indexer.h:338
OPEN3D_HOST_DEVICE char * GetOutputPtr(int64_t workload_idx) const
Definition: Indexer.h:437
Indexer(const Indexer &)=default
bool IsFinalOutput() const
Definition: Indexer.h:298
const TensorRef & GetOutput(int64_t i) const
Definition: Indexer.h:367
void ReorderDimensions(const SizeVector &reduction_dims)
Definition: Indexer.cpp:491
void CoalesceDimensions()
Definition: Indexer.cpp:425
int64_t master_shape_[MAX_DIMS]
Definition: Indexer.h:619
int64_t NumOutputElements() const
Returns the number of output elements.
Definition: Indexer.cpp:414
int64_t num_inputs_
Number of input and output Tensors.
Definition: Indexer.h:593
bool accumulate_
Definition: Indexer.h:635
bool CanUse32BitIndexing() const
Returns true iff the maximum_offsets in bytes are smaller than 2^31 - 1.
Definition: Indexer.cpp:198
TensorRef inputs_[MAX_INPUTS]
Array of input TensorRefs.
Definition: Indexer.h:597
TensorRef & GetOutput(int64_t i)
Returns output TensorRef.
Definition: Indexer.h:360
bool ShouldAccumulate() const
Definition: Indexer.h:296
const TensorRef & GetInput(int64_t i) const
Definition: Indexer.h:351
int64_t master_strides_[MAX_DIMS]
Definition: Indexer.h:623
Indexer GetPerOutputIndexer(int64_t output_idx) const
Definition: Indexer.cpp:303
int64_t * GetMasterShape()
Definition: Indexer.h:316
Indexer & operator=(const Indexer &)=default
const int64_t * GetMasterShape() const
Definition: Indexer.h:315
std::unique_ptr< Indexer > SplitLargestDim()
Definition: Indexer.cpp:238
int64_t NumWorkloads() const
Definition: Indexer.cpp:406
IndexerIterator SplitTo32BitIndexing() const
Definition: Indexer.cpp:234
OPEN3D_HOST_DEVICE T * GetWorkloadDataPtr(const TensorRef &tr, bool tr_contiguous, int64_t workload_idx) const
Definition: Indexer.h:570
void UpdateMasterStrides()
Update master_strides_ based on master_shape_.
Definition: Indexer.cpp:556
static void BroadcastRestride(TensorRef &src, int64_t dst_ndims, const int64_t *dst_shape)
Definition: Indexer.cpp:575
bool final_output_
Definition: Indexer.h:631
OPEN3D_HOST_DEVICE char * GetOutputPtr(int64_t output_idx, int64_t workload_idx) const
Definition: Indexer.h:460
OPEN3D_HOST_DEVICE char * GetInputPtr(int64_t input_idx, int64_t workload_idx) const
Definition: Indexer.h:405
TensorRef & GetInput(int64_t i)
Returns input TensorRef.
Definition: Indexer.h:344
TensorRef & GetOutput()
Definition: Indexer.h:377
OPEN3D_HOST_DEVICE T * GetOutputPtr(int64_t workload_idx) const
Definition: Indexer.h:450
int64_t ndims_
Indexer's global number of dimensions.
Definition: Indexer.h:626
void ShrinkDim(int64_t dim, int64_t start, int64_t size)
Definition: Indexer.cpp:364
int64_t NumDims() const
Returns number of dimensions of the Indexer.
Definition: Indexer.h:311
int64_t NumOutputs() const
Number of output Tensors.
Definition: Indexer.h:341
const int64_t * GetMasterStrides() const
Definition: Indexer.h:320
Definition: Indexer.h:638
Iterator end() const
Definition: Indexer.cpp:671
Iterator begin() const
Definition: Indexer.cpp:667
IndexerIterator(const Indexer &indexer)
Definition: Indexer.cpp:641
Definition: SizeVector.h:69
size_t size() const
Definition: SmallVector.h:119
T * GetDataPtr()
Definition: Tensor.h:1133
SizeVector GetShape() const
Definition: Tensor.h:1116
int64_t NumDims() const
Definition: Tensor.h:1161
int64_t GetStride(int64_t dim) const
Definition: Tensor.h:1128
Dtype GetDtype() const
Definition: Tensor.h:1153
Definition: Indexer.h:221
OPEN3D_HOST_DEVICE void * GetPtr(int64_t workload_idx) const
Definition: Indexer.h:234
OPEN3D_HOST_DEVICE int64_t NumWorkloads() const
Definition: Indexer.h:226
TensorRef input_
Definition: Indexer.h:250
TensorIterator(const Tensor &tensor)
Definition: Indexer.h:223
int64_t ndims_
Definition: Indexer.h:251
int64_t WrapDim(int64_t dim, int64_t max_dim, bool inclusive)
Wrap around negative dim.
Definition: ShapeUtil.cpp:131
SizeVector DefaultStrides(const SizeVector &shape)
Compute default strides for a shape when a tensor is contiguous.
Definition: ShapeUtil.cpp:214
DtypePolicy
Definition: Indexer.h:199
int index_t
Definition: VoxelBlockGrid.h:22
Definition: PinholeCameraIntrinsic.cpp:16
Definition: Indexer.h:640
bool operator!=(const Iterator &other) const
Definition: Indexer.cpp:663
Iterator()
Definition: Indexer.h:641
std::vector< std::unique_ptr< Indexer > > vec_
Definition: Indexer.h:650
Iterator(Iterator &&other)=default
Indexer & operator*() const
Definition: Indexer.cpp:649
bool operator==(const Iterator &other) const
Definition: Indexer.cpp:660
Iterator & operator++()
Definition: Indexer.cpp:651
index_t sizes_[MAX_DIMS]
Definition: Indexer.h:100
int dims_
Definition: Indexer.h:99
OPEN3D_HOST_DEVICE utility::MiniVec< index_t, NARGS > get(index_t linear_idx) const
Definition: Indexer.h:69
OffsetCalculator(int dims, const int64_t *sizes, const int64_t *const *strides)
Definition: Indexer.h:49
index_t strides_[MAX_DIMS][NARGS]
Definition: Indexer.h:101
A minimalistic class that reference a Tensor.
Definition: Indexer.h:105
int64_t dtype_byte_size_
Definition: Indexer.h:194
void Permute(const SizeVector &dims)
Permute (dimension shuffle) the reference to a Tensor.
Definition: Indexer.h:131
int64_t ndims_
Definition: Indexer.h:193
bool operator!=(const TensorRef &other) const
Definition: Indexer.h:185
TensorRef(const Tensor &t)
Definition: Indexer.h:111
TensorRef()
Definition: Indexer.h:109
int64_t shape_[MAX_DIMS]
Definition: Indexer.h:195
bool IsContiguous() const
Returns True if the underlying memory buffer is contiguous.
Definition: Indexer.h:163
void * data_ptr_
Definition: Indexer.h:192
bool operator==(const TensorRef &other) const
Definition: Indexer.h:173
int64_t byte_strides_[MAX_DIMS]
Definition: Indexer.h:196