0.10.0/cpp_api/_indexer_8h_source.html

 // ----------------------------------------------------------------------------
 // -                        Open3D: www.open3d.org                            -
 // ----------------------------------------------------------------------------
 // The MIT License (MIT)
 //
 // Copyright (c) 2018 www.open3d.org
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files (the "Software"), to deal
 // in the Software without restriction, including without limitation the rights
 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 // copies of the Software, and to permit persons to whom the Software is
 // furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in
 // all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 // IN THE SOFTWARE.
 // ----------------------------------------------------------------------------

 #pragma once

 #include "Open3D/Core/CUDAUtils.h"
 #include "Open3D/Core/Dtype.h"
 #include "Open3D/Core/ShapeUtil.h"
 #include "Open3D/Core/SizeVector.h"
 #include "Open3D/Core/Tensor.h"
 #include "Open3D/Utility/Console.h"

 #include <sstream>

 namespace open3d {

 class Indexer;

 class IndexerIterator;

 // Maximum number of dimensions of TensorRef.
 static constexpr int64_t MAX_DIMS = 10;

 // Maximum number of inputs of an op.
 // MAX_INPUTS shall be >= MAX_DIMS to support advanced indexing.
 static constexpr int64_t MAX_INPUTS = 10;

 // Maximum number of outputs of an op. This number can be increased when
 // necessary.
 static constexpr int64_t MAX_OUTPUTS = 2;

 // Fixed-size array type usable from host and device.
 template <typename T, int size>
 struct alignas(16) SmallArray {
     T data_[size];

     OPEN3D_HOST_DEVICE T operator[](int i) const { return data_[i]; }
     OPEN3D_HOST_DEVICE T& operator[](int i) { return data_[i]; }

     SmallArray() = default;
     SmallArray(const SmallArray&) = default;
     SmallArray& operator=(const SmallArray&) = default;
 };

 template <int NARGS, typename index_t = uint32_t>
 struct OffsetCalculator {
     OffsetCalculator(int dims,
                      const int64_t* sizes,
                      const int64_t* const* strides)
         : dims_(dims) {
         if (dims_ > MAX_DIMS) {
             utility::LogError("tensor has too many (>{}) dims_", MAX_DIMS);
         }

         for (int i = 0; i < MAX_DIMS; ++i) {
             if (i < dims_) {
                 sizes_[i] = sizes[i];
             } else {
                 sizes_[i] = 1;
             }
             for (int arg = 0; arg < NARGS; arg++) {
                 strides_[i][arg] = i < dims_ ? strides[arg][i] : 0;
             }
         }
     }

     OPEN3D_HOST_DEVICE SmallArray<index_t, NARGS> get(
             index_t linear_idx) const {
         SmallArray<index_t, NARGS> offsets;
 #if defined(__CUDA_ARCH__)
 #pragma unroll
 #endif
         for (int arg = 0; arg < NARGS; arg++) {
             offsets[arg] = 0;
         }

 #if defined(__CUDA_ARCH__)
 #pragma unroll
 #endif
         for (int dim = 0; dim < MAX_DIMS; ++dim) {
             if (dim == dims_) {
                 break;
             }
             index_t mod = linear_idx % sizes_[dim];
             linear_idx = linear_idx / sizes_[dim];

 #if defined(__CUDA_ARCH__)
 #pragma unroll
 #endif
             for (int arg = 0; arg < NARGS; arg++) {
                 offsets[arg] += mod * strides_[dim][arg];
             }
         }
         return offsets;
     }

     int dims_;
     index_t sizes_[MAX_DIMS];
     index_t strides_[MAX_DIMS][NARGS];
 };

 struct TensorRef {
     // The default copy constructor works on __device__ as well so we don't
     // define it explicitly. shape_[MAX_DIMS] and strides[MAX_DIMS] will be
     // copied fully.
     TensorRef() : data_ptr_(nullptr), ndims_(0), dtype_byte_size_(0) {}

     TensorRef(const Tensor& t) {
         if (t.NumDims() > MAX_DIMS) {
             utility::LogError("Tenor has too many dimensions {} > {}.",
                               t.NumDims(), MAX_DIMS);
         }
         data_ptr_ = const_cast<void*>(t.GetDataPtr());
         ndims_ = t.NumDims();
         dtype_byte_size_ = DtypeUtil::ByteSize(t.GetDtype());
         for (int64_t i = 0; i < ndims_; ++i) {
             shape_[i] = t.GetShape(i);
             byte_strides_[i] = t.GetStride(i) * dtype_byte_size_;
         }
     }

     void Permute(const SizeVector& dims) {
         // Check dims are permuntation of [0, 1, 2, ..., n-1]
         if (static_cast<int64_t>(dims.size()) != ndims_) {
             utility::LogError("Number of dimensions mismatch {} != {}.",
                               dims.size(), ndims_);
         }
         std::vector<bool> seen_dims(ndims_, false);
         for (const int64_t& dim : dims) {
             seen_dims[dim] = true;
         }
         if (!std::all_of(seen_dims.begin(), seen_dims.end(),
                          [](bool seen) { return seen; })) {
             utility::LogError(
                     "Permute dims must be a permuntation from 0 to {}.",
                     dims.size() - 1);
         }

         // Map to new shape and strides
         SizeVector new_shape(ndims_);
         SizeVector new_byte_strides(ndims_);
         for (int64_t i = 0; i < ndims_; ++i) {
             int64_t old_dim = shape_util::WrapDim(dims[i], ndims_);
             new_shape[i] = shape_[old_dim];
             new_byte_strides[i] = byte_strides_[old_dim];
         }
         for (int64_t i = 0; i < ndims_; ++i) {
             shape_[i] = new_shape[i];
             byte_strides_[i] = new_byte_strides[i];
         }
     }

     bool operator==(const TensorRef& other) const {
         bool rc = true;
         rc = rc && (data_ptr_ == other.data_ptr_);
         rc = rc && (ndims_ == other.ndims_);
         rc = rc && (dtype_byte_size_ == other.dtype_byte_size_);
         for (int64_t i = 0; i < ndims_; ++i) {
             rc = rc && (shape_[i] == other.shape_[i]);
             rc = rc && (byte_strides_[i] == other.byte_strides_[i]);
         }
         return rc;
     }

     bool operator!=(const TensorRef& other) const { return !(*this == other); }

     void* data_ptr_;
     int64_t ndims_ = 0;
     int64_t dtype_byte_size_ = 0;
     int64_t shape_[MAX_DIMS];
     int64_t byte_strides_[MAX_DIMS];
 };

 enum class DtypePolicy {
     NONE,        // Do not check. Expects the kernel to handle the conversion.
                  // E.g. in Copy kernel with type casting.
     ALL_SAME,    // All inputs and outputs to to have the same dtype.
     INPUT_SAME,  // All inputs have the same dtype.
     INPUT_SAME_OUTPUT_BOOL  // All inputs have the same dtype. Outputs
                             // have bool dtype.
 };

 class TensorIterator {
 public:
     TensorIterator(const Tensor& tensor)
         : input_(TensorRef(tensor)), ndims_(tensor.NumDims()) {}

     OPEN3D_HOST_DEVICE int64_t NumWorkloads() const {
         int64_t num_workloads = 1;
         for (int64_t i = 0; i < ndims_; ++i) {
             num_workloads *= input_.shape_[i];
         }
         return num_workloads;
     }

     OPEN3D_HOST_DEVICE void* GetPtr(int64_t workload_idx) const {
         if (workload_idx < 0 || workload_idx >= NumWorkloads()) {
             return nullptr;
         }
         int64_t offset = 0;
         workload_idx = workload_idx * input_.dtype_byte_size_;
         for (int64_t i = 0; i < ndims_; ++i) {
             offset += workload_idx / input_.byte_strides_[i] *
                       input_.byte_strides_[i];
             workload_idx = workload_idx % input_.byte_strides_[i];
         }
         return static_cast<void*>(static_cast<char*>(input_.data_ptr_) +
                                   offset);
     }

 protected:
     TensorRef input_;
     int64_t ndims_;
 };

 class Indexer {
 public:
     Indexer() {}
     Indexer(const Indexer&) = default;
     Indexer& operator=(const Indexer&) = default;

     Indexer(const std::vector<Tensor>& input_tensors,
             const Tensor& output_tensor,
             DtypePolicy dtype_policy = DtypePolicy::ALL_SAME,
             const SizeVector& reduction_dims = {});

     Indexer(const std::vector<Tensor>& input_tensors,
             const std::vector<Tensor>& output_tensors,
             DtypePolicy dtype_policy = DtypePolicy::ALL_SAME,
             const SizeVector& reduction_dims = {});

     bool CanUse32BitIndexing() const;

     IndexerIterator SplitTo32BitIndexing() const;

     std::unique_ptr<Indexer> SplitLargestDim();

     Indexer GetPerOutputIndexer(int64_t output_idx) const;

     bool ShouldAccumulate() const { return accumulate_; }

     bool IsFinalOutput() const { return final_output_; }

     void ShrinkDim(int64_t dim, int64_t start, int64_t size);

     int64_t NumReductionDims() const;

     int64_t NumDims() const { return ndims_; }

     const int64_t* GetMasterShape() const { return master_shape_; }
     int64_t* GetMasterShape() { return master_shape_; }

     const int64_t* GetMasterStrides() const { return master_strides_; }

     int64_t NumWorkloads() const;

     int64_t NumOutputElements() const;

     int64_t NumInputs() const { return num_inputs_; }

     TensorRef& GetInput(int64_t i) {
         if (i >= num_inputs_ || i < 0) {
             utility::LogError("0 <= i < {} required, however, i = {}.",
                               num_inputs_, i);
         }
         return inputs_[i];
     }
     const TensorRef& GetInput(int64_t i) const {
         if (i >= num_inputs_ || i < 0) {
             utility::LogError("0 <= i < {} required, however, i = {}.",
                               num_inputs_, i);
         }
         return inputs_[i];
     }

     TensorRef& GetOutput(int64_t i) {
         if (i >= num_outputs_ || i < 0) {
             utility::LogError("0 <= i < {} required, however, i = {}.",
                               num_outputs_, i);
         }
         return outputs_[i];
     }
     const TensorRef& GetOutput(int64_t i) const {
         if (i >= num_outputs_ || i < 0) {
             utility::LogError("0 <= i < {} required, however, i = {}.",
                               num_outputs_, i);
         }
         return outputs_[i];
     }

     TensorRef& GetOutput() {
         if (num_outputs_ > 1) {
             utility::LogError("num_outputs_ == {} > 0, use GetOutput(i)",
                               num_outputs_);
         }
         return GetOutput(0);
     }
     const TensorRef& GetOutput() const {
         if (num_outputs_ > 1) {
             utility::LogError("num_outputs_ == {} > 0, use GetOutput(i)",
                               num_outputs_);
         }
         return GetOutput(0);
     }

     bool IsReductionDim(int64_t dim) const {
         // All outputs have the same shape and reduction dims. Even if they
         // don't have the same initial strides, the reduced strides are always
         // set to 0. Thus it is okay to use outputs_[0].
         return outputs_[0].byte_strides_[dim] == 0 && master_shape_[dim] > 1;
     }

     OPEN3D_HOST_DEVICE char* GetInputPtr(int64_t input_idx,
                                          int64_t workload_idx) const {
         if (input_idx < 0 || input_idx >= num_inputs_) {
             return nullptr;
         }
         return GetWorkloadDataPtr(inputs_[input_idx], workload_idx);
     }

     OPEN3D_HOST_DEVICE char* GetOutputPtr(int64_t workload_idx) const {
         return GetWorkloadDataPtr(outputs_[0], workload_idx);
     }
     OPEN3D_HOST_DEVICE char* GetOutputPtr(int64_t output_idx,
                                           int64_t workload_idx) const {
         return GetWorkloadDataPtr(outputs_[output_idx], workload_idx);
     }

 protected:
     void CoalesceDimensions();

     // Permute reduction dimensions to front.
     // TODO: Sort the dimensions based on strides in ascending orderto improve
     // thread coalescing.
     void ReorderDimensions(const SizeVector& reduction_dims);

     void UpdateMasterStrides();

     static void BroadcastRestride(TensorRef& src,
                                   int64_t dst_ndims,
                                   const int64_t* dst_shape);

     static void ReductionRestride(TensorRef& dst,
                                   int64_t src_ndims,
                                   const int64_t* src_shape,
                                   const SizeVector& reduction_dims);

     OPEN3D_HOST_DEVICE char* GetWorkloadDataPtr(const TensorRef& tr,
                                                 int64_t workload_idx) const {
         // For 0-sized input reduction op, the output Tensor
         // workload_idx == 1 > NumWorkloads() == 0.
         if (workload_idx < 0) {
             return nullptr;
         }
         int64_t offset = 0;
         for (int64_t i = 0; i < ndims_; ++i) {
             offset += workload_idx / master_strides_[i] * tr.byte_strides_[i];
             workload_idx = workload_idx % master_strides_[i];
         }
         return static_cast<char*>(tr.data_ptr_) + offset;
     }

     int64_t num_inputs_ = 0;
     int64_t num_outputs_ = 0;

     TensorRef inputs_[MAX_INPUTS];

     TensorRef outputs_[MAX_OUTPUTS];

     int64_t master_shape_[MAX_DIMS];

     int64_t master_strides_[MAX_DIMS];

     int64_t ndims_ = 0;

     bool final_output_ = true;

     bool accumulate_ = false;
 };

 class IndexerIterator {
 public:
     struct Iterator {
         Iterator(){};
         Iterator(const Indexer& indexer);
         Iterator(Iterator&& other) = default;

         Indexer& operator*() const;
         Iterator& operator++();
         bool operator==(const Iterator& other) const;
         bool operator!=(const Iterator& other) const;

         std::vector<std::unique_ptr<Indexer>> vec_;
     };

     IndexerIterator(const Indexer& indexer);

     Iterator begin() const;
     Iterator end() const;

 private:
     const Indexer& indexer_;
 };

 }  // namespace open3d
open3d::TensorRef::ndims_
int64_t ndims_
Definition: Indexer.h:192

open3d::Tensor::GetDtype
Dtype GetDtype() const
Definition: Tensor.h:742

open3d::Indexer::GetOutput
const TensorRef & GetOutput(int64_t i) const
Definition: Indexer.h:363

open3d::Indexer::IsFinalOutput
bool IsFinalOutput() const
Definition: Indexer.h:297

open3d::SmallArray::data_
T data_[size]
Definition: Indexer.h:58

open3d::Tensor::GetDataPtr
void * GetDataPtr()
Definition: Tensor.h:738

open3d::OffsetCalculator
Definition: Indexer.h:69

open3d::TensorRef::TensorRef
TensorRef(const Tensor &t)
Definition: Indexer.h:132

open3d::TensorIterator::input_
TensorRef input_
Definition: Indexer.h:249

open3d::IndexerIterator::Iterator
Definition: Indexer.h:531

open3d::TensorRef::byte_strides_
int64_t byte_strides_[MAX_DIMS]
Definition: Indexer.h:195

open3d::Indexer::GetInput
TensorRef & GetInput(int64_t i)
Returns input TensorRef.
Definition: Indexer.h:340

open3d::Tensor::NumDims
int64_t NumDims() const
Definition: Tensor.h:750

open3d::Indexer::IsReductionDim
bool IsReductionDim(int64_t dim) const
Returns true if the dim -th dimension is reduced.
Definition: Indexer.h:389

offset
int offset
Definition: FilePCD.cpp:62

open3d::Indexer::Indexer
Indexer()
Definition: Indexer.h:262

open3d::TensorIterator::ndims_
int64_t ndims_
Definition: Indexer.h:250

ShapeUtil.h

open3d::utility::LogError
void LogError(const char *format, const Args &... args)
Definition: Console.h:174

open3d::Indexer::GetOutput
TensorRef & GetOutput()
Definition: Indexer.h:373

open3d::Indexer::GetOutputPtr
OPEN3D_HOST_DEVICE char * GetOutputPtr(int64_t output_idx, int64_t workload_idx) const
Definition: Indexer.h:416

open3d::TensorRef::operator!=
bool operator!=(const TensorRef &other) const
Definition: Indexer.h:189

open3d::TensorRef::operator==
bool operator==(const TensorRef &other) const
Definition: Indexer.h:177

SizeVector.h

open3d::Indexer::GetMasterShape
int64_t * GetMasterShape()
Definition: Indexer.h:315

open3d::Indexer::GetOutput
TensorRef & GetOutput(int64_t i)
Returns output TensorRef.
Definition: Indexer.h:356

open3d::Indexer::GetWorkloadDataPtr
OPEN3D_HOST_DEVICE char * GetWorkloadDataPtr(const TensorRef &tr, int64_t workload_idx) const
Definition: Indexer.h:474

open3d::OffsetCalculator::dims_
int dims_
Definition: Indexer.h:120

open3d::Indexer::GetInput
const TensorRef & GetInput(int64_t i) const
Definition: Indexer.h:347

open3d::Indexer::GetOutputPtr
OPEN3D_HOST_DEVICE char * GetOutputPtr(int64_t workload_idx) const
Definition: Indexer.h:413

size
int size
Definition: FilePCD.cpp:57

Console.h

OPEN3D_HOST_DEVICE
#define OPEN3D_HOST_DEVICE
Definition: CUDAUtils.h:54

open3d::SizeVector
Definition: SizeVector.h:40

open3d::TensorRef
A minimalistic class that reference a Tensor.
Definition: Indexer.h:126

open3d::operator*
Tensor operator*(T scalar_lhs, const Tensor &rhs)
Definition: Tensor.h:886

open3d::DtypePolicy::NONE

open3d::IndexerIterator
Definition: Indexer.h:529

open3d::DtypeUtil::ByteSize
static int64_t ByteSize(const Dtype &dtype)
Definition: Dtype.h:61

open3d::SmallArray::SmallArray
SmallArray()=default

open3d::Indexer::GetMasterShape
const int64_t * GetMasterShape() const
Definition: Indexer.h:314

open3d::SmallArray::operator[]
OPEN3D_HOST_DEVICE T operator[](int i) const
Definition: Indexer.h:60

open3d::TensorIterator::NumWorkloads
OPEN3D_HOST_DEVICE int64_t NumWorkloads() const
Definition: Indexer.h:225

open3d::SmallArray::operator[]
OPEN3D_HOST_DEVICE T & operator[](int i)
Definition: Indexer.h:61

open3d::TensorRef::Permute
void Permute(const SizeVector &dims)
Definition: Indexer.h:146

open3d::TensorIterator
Definition: Indexer.h:220

open3d::Tensor::GetStride
int64_t GetStride(int64_t dim) const
Definition: Tensor.h:734

open3d::IndexerIterator::Iterator::Iterator
Iterator()
Definition: Indexer.h:532

open3d::TensorIterator::GetPtr
OPEN3D_HOST_DEVICE void * GetPtr(int64_t workload_idx) const
Definition: Indexer.h:233

open3d::Indexer::GetMasterStrides
const int64_t * GetMasterStrides() const
Definition: Indexer.h:319

open3d::TensorRef::shape_
int64_t shape_[MAX_DIMS]
Definition: Indexer.h:194

open3d::SmallArray::operator=
SmallArray & operator=(const SmallArray &)=default

open3d::Indexer::NumDims
int64_t NumDims() const
Returns number of dimensions of the Indexer.
Definition: Indexer.h:310

open3d::TensorRef::TensorRef
TensorRef()
Definition: Indexer.h:130

open3d
Definition: Open3DViewer.h:29

open3d::TensorRef::data_ptr_
void * data_ptr_
Definition: Indexer.h:191

open3d::DtypePolicy::ALL_SAME

open3d::Indexer
Definition: Indexer.h:260

open3d::Indexer::NumInputs
int64_t NumInputs() const
Number of input Tensors.
Definition: Indexer.h:337

open3d::OffsetCalculator::OffsetCalculator
OffsetCalculator(int dims, const int64_t *sizes, const int64_t *const *strides)
Definition: Indexer.h:70

Tensor.h

open3d::shape_util::WrapDim
int64_t WrapDim(int64_t dim, int64_t max_dim)
Wrap around negative dim.
Definition: ShapeUtil.cpp:147

open3d::IndexerIterator::Iterator::vec_
std::vector< std::unique_ptr< Indexer > > vec_
Definition: Indexer.h:541

open3d::Indexer::ShouldAccumulate
bool ShouldAccumulate() const
Definition: Indexer.h:295

open3d::DtypePolicy::INPUT_SAME_OUTPUT_BOOL

open3d::Indexer::GetInputPtr
OPEN3D_HOST_DEVICE char * GetInputPtr(int64_t input_idx, int64_t workload_idx) const
Definition: Indexer.h:401

open3d::Tensor
Definition: Tensor.h:46

open3d::DtypePolicy
DtypePolicy
Definition: Indexer.h:198

CUDAUtils.h
Common CUDA utilities.

open3d::SmallArray
Definition: Indexer.h:57

Dtype.h

open3d::DtypePolicy::INPUT_SAME

open3d::TensorIterator::TensorIterator
TensorIterator(const Tensor &tensor)
Definition: Indexer.h:222

open3d::Indexer::GetOutput
const TensorRef & GetOutput() const
Definition: Indexer.h:380

open3d::TensorRef::dtype_byte_size_
int64_t dtype_byte_size_
Definition: Indexer.h:193

open3d::Tensor::GetShape
SizeVector GetShape() const
Definition: Tensor.h:722