Open3D (C++ API)  0.17.0
PointCloudImpl.h
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - Open3D: www.open3d.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2023 www.open3d.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
8 #include <atomic>
9 #include <vector>
10 
11 #include "open3d/core/CUDAUtils.h"
12 #include "open3d/core/Dispatch.h"
13 #include "open3d/core/Dtype.h"
16 #include "open3d/core/SizeVector.h"
17 #include "open3d/core/Tensor.h"
25 #include "open3d/utility/Logging.h"
26 
27 namespace open3d {
28 namespace t {
29 namespace geometry {
30 namespace kernel {
31 namespace pointcloud {
32 
33 #ifndef __CUDACC__
34 using std::abs;
35 using std::max;
36 using std::min;
37 using std::sqrt;
38 #endif
39 
40 #if defined(__CUDACC__)
41 void UnprojectCUDA
42 #else
44 #endif
45  (const core::Tensor& depth,
47  image_colors,
50  const core::Tensor& intrinsics,
51  const core::Tensor& extrinsics,
52  float depth_scale,
53  float depth_max,
54  int64_t stride) {
55 
56  const bool have_colors = image_colors.has_value();
57  NDArrayIndexer depth_indexer(depth, 2);
58  NDArrayIndexer image_colors_indexer;
59 
61  TransformIndexer ti(intrinsics, pose, 1.0f);
62 
63  // Output
64  int64_t rows_strided = depth_indexer.GetShape(0) / stride;
65  int64_t cols_strided = depth_indexer.GetShape(1) / stride;
66 
67  points = core::Tensor({rows_strided * cols_strided, 3}, core::Float32,
68  depth.GetDevice());
69  NDArrayIndexer point_indexer(points, 1);
70  NDArrayIndexer colors_indexer;
71  if (have_colors) {
72  const auto& imcol = image_colors.value().get();
73  image_colors_indexer = NDArrayIndexer{imcol, 2};
74  colors.value().get() = core::Tensor({rows_strided * cols_strided, 3},
75  core::Float32, imcol.GetDevice());
76  colors_indexer = NDArrayIndexer(colors.value().get(), 1);
77  }
78 
79  // Counter
80 #if defined(__CUDACC__)
81  core::Tensor count(std::vector<int>{0}, {}, core::Int32, depth.GetDevice());
82  int* count_ptr = count.GetDataPtr<int>();
83 #else
84  std::atomic<int> count_atomic(0);
85  std::atomic<int>* count_ptr = &count_atomic;
86 #endif
87 
88  int64_t n = rows_strided * cols_strided;
89 
90  DISPATCH_DTYPE_TO_TEMPLATE(depth.GetDtype(), [&]() {
91  core::ParallelFor(
92  depth.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) {
93  int64_t y = (workload_idx / cols_strided) * stride;
94  int64_t x = (workload_idx % cols_strided) * stride;
95 
96  float d = *depth_indexer.GetDataPtr<scalar_t>(x, y) /
97  depth_scale;
98  if (d > 0 && d < depth_max) {
99  int idx = OPEN3D_ATOMIC_ADD(count_ptr, 1);
100 
101  float x_c = 0, y_c = 0, z_c = 0;
102  ti.Unproject(static_cast<float>(x),
103  static_cast<float>(y), d, &x_c, &y_c,
104  &z_c);
105 
106  float* vertex = point_indexer.GetDataPtr<float>(idx);
107  ti.RigidTransform(x_c, y_c, z_c, vertex + 0, vertex + 1,
108  vertex + 2);
109  if (have_colors) {
110  float* pcd_pixel =
111  colors_indexer.GetDataPtr<float>(idx);
112  float* image_pixel =
113  image_colors_indexer.GetDataPtr<float>(x,
114  y);
115  *pcd_pixel = *image_pixel;
116  *(pcd_pixel + 1) = *(image_pixel + 1);
117  *(pcd_pixel + 2) = *(image_pixel + 2);
118  }
119  }
120  });
121  });
122 #if defined(__CUDACC__)
123  int total_pts_count = count.Item<int>();
124 #else
125  int total_pts_count = (*count_ptr).load();
126 #endif
127 
128 #ifdef __CUDACC__
130 #endif
131  points = points.Slice(0, 0, total_pts_count);
132  if (have_colors) {
133  colors.value().get() =
134  colors.value().get().Slice(0, 0, total_pts_count);
135  }
136 }
137 
138 #if defined(__CUDACC__)
139 void GetPointMaskWithinAABBCUDA
140 #else
142 #endif
143  (const core::Tensor& points,
144  const core::Tensor& min_bound,
145  const core::Tensor& max_bound,
146  core::Tensor& mask) {
147 
148  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(points.GetDtype(), [&]() {
149  const scalar_t* points_ptr = points.GetDataPtr<scalar_t>();
150  const int64_t n = points.GetLength();
151  const scalar_t* min_bound_ptr = min_bound.GetDataPtr<scalar_t>();
152  const scalar_t* max_bound_ptr = max_bound.GetDataPtr<scalar_t>();
153  bool* mask_ptr = mask.GetDataPtr<bool>();
154 
155  core::ParallelFor(
156  points.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) {
157  const scalar_t x = points_ptr[3 * workload_idx + 0];
158  const scalar_t y = points_ptr[3 * workload_idx + 1];
159  const scalar_t z = points_ptr[3 * workload_idx + 2];
160 
161  if (x >= min_bound_ptr[0] && x <= max_bound_ptr[0] &&
162  y >= min_bound_ptr[1] && y <= max_bound_ptr[1] &&
163  z >= min_bound_ptr[2] && z <= max_bound_ptr[2]) {
164  mask_ptr[workload_idx] = true;
165  } else {
166  mask_ptr[workload_idx] = false;
167  }
168  });
169  });
170 }
171 
172 #if defined(__CUDACC__)
173 void GetPointMaskWithinOBBCUDA
174 #else
176 #endif
177  (const core::Tensor& points,
178  const core::Tensor& center,
179  const core::Tensor& rotation,
180  const core::Tensor& extent,
181  core::Tensor& mask) {
182  const core::Tensor half_extent = extent.Div(2);
183  // Since we will extract 3 rotation axis from matrix and use it inside
184  // kernel, the transpose is needed.
185  const core::Tensor rotation_t = rotation.Transpose(0, 1).Contiguous();
186  const core::Tensor pd = points - center;
187  const int64_t n = points.GetLength();
188 
189  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(points.GetDtype(), [&]() {
190  const scalar_t* pd_ptr = pd.GetDataPtr<scalar_t>();
191  // const scalar_t* center_ptr = center.GetDataPtr<scalar_t>();
192  const scalar_t* rotation_ptr = rotation_t.GetDataPtr<scalar_t>();
193  const scalar_t* half_extent_ptr = half_extent.GetDataPtr<scalar_t>();
194  bool* mask_ptr = mask.GetDataPtr<bool>();
195 
196  core::ParallelFor(points.GetDevice(), n,
197  [=] OPEN3D_DEVICE(int64_t workload_idx) {
198  int64_t idx = 3 * workload_idx;
199  if (abs(core::linalg::kernel::dot_3x1(
200  pd_ptr + idx, rotation_ptr)) <=
201  half_extent_ptr[0] &&
202  abs(core::linalg::kernel::dot_3x1(
203  pd_ptr + idx, rotation_ptr + 3)) <=
204  half_extent_ptr[1] &&
205  abs(core::linalg::kernel::dot_3x1(
206  pd_ptr + idx, rotation_ptr + 6)) <=
207  half_extent_ptr[2]) {
208  mask_ptr[workload_idx] = true;
209  } else {
210  mask_ptr[workload_idx] = false;
211  }
212  });
213  });
214 }
215 
216 #if defined(__CUDACC__)
217 void NormalizeNormalsCUDA
218 #else
220 #endif
221  (core::Tensor& normals) {
222  const core::Dtype dtype = normals.GetDtype();
223  const int64_t n = normals.GetLength();
224 
225  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
226  scalar_t* ptr = normals.GetDataPtr<scalar_t>();
227 
228  core::ParallelFor(normals.GetDevice(), n,
229  [=] OPEN3D_DEVICE(int64_t workload_idx) {
230  int64_t idx = 3 * workload_idx;
231  scalar_t x = ptr[idx];
232  scalar_t y = ptr[idx + 1];
233  scalar_t z = ptr[idx + 2];
234  scalar_t norm = sqrt(x * x + y * y + z * z);
235  if (norm > 0) {
236  x /= norm;
237  y /= norm;
238  z /= norm;
239  }
240  ptr[idx] = x;
241  ptr[idx + 1] = y;
242  ptr[idx + 2] = z;
243  });
244  });
245 }
246 
247 #if defined(__CUDACC__)
248 void OrientNormalsToAlignWithDirectionCUDA
249 #else
251 #endif
252  (core::Tensor& normals, const core::Tensor& direction) {
253  const core::Dtype dtype = normals.GetDtype();
254  const int64_t n = normals.GetLength();
255 
256  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
257  scalar_t* ptr = normals.GetDataPtr<scalar_t>();
258  const scalar_t* direction_ptr = direction.GetDataPtr<scalar_t>();
259 
260  core::ParallelFor(normals.GetDevice(), n,
261  [=] OPEN3D_DEVICE(int64_t workload_idx) {
262  int64_t idx = 3 * workload_idx;
263  scalar_t* normal = ptr + idx;
264  const scalar_t norm = sqrt(normal[0] * normal[0] +
265  normal[1] * normal[1] +
266  normal[2] * normal[2]);
267  if (norm == 0.0) {
268  normal[0] = direction_ptr[0];
269  normal[1] = direction_ptr[1];
270  normal[2] = direction_ptr[2];
272  normal, direction_ptr) < 0) {
273  normal[0] *= -1;
274  normal[1] *= -1;
275  normal[2] *= -1;
276  }
277  });
278  });
279 }
280 
281 #if defined(__CUDACC__)
282 void OrientNormalsTowardsCameraLocationCUDA
283 #else
285 #endif
286  (const core::Tensor& points,
287  core::Tensor& normals,
288  const core::Tensor& camera) {
289  const core::Dtype dtype = points.GetDtype();
290  const int64_t n = normals.GetLength();
291 
292  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
293  scalar_t* normals_ptr = normals.GetDataPtr<scalar_t>();
294  const scalar_t* camera_ptr = camera.GetDataPtr<scalar_t>();
295  const scalar_t* points_ptr = points.GetDataPtr<scalar_t>();
296 
298  normals.GetDevice(), n,
299  [=] OPEN3D_DEVICE(int64_t workload_idx) {
300  int64_t idx = 3 * workload_idx;
301  scalar_t* normal = normals_ptr + idx;
302  const scalar_t* point = points_ptr + idx;
303  const scalar_t reference[3] = {camera_ptr[0] - point[0],
304  camera_ptr[1] - point[1],
305  camera_ptr[2] - point[2]};
306  const scalar_t norm =
307  sqrt(normal[0] * normal[0] + normal[1] * normal[1] +
308  normal[2] * normal[2]);
309  if (norm == 0.0) {
310  normal[0] = reference[0];
311  normal[1] = reference[1];
312  normal[2] = reference[2];
313  const scalar_t norm_new = sqrt(normal[0] * normal[0] +
314  normal[1] * normal[1] +
315  normal[2] * normal[2]);
316  if (norm_new == 0.0) {
317  normal[0] = 0.0;
318  normal[1] = 0.0;
319  normal[2] = 1.0;
320  } else {
321  normal[0] /= norm_new;
322  normal[1] /= norm_new;
323  normal[2] /= norm_new;
324  }
325  } else if (core::linalg::kernel::dot_3x1(normal,
326  reference) < 0) {
327  normal[0] *= -1;
328  normal[1] *= -1;
329  normal[2] *= -1;
330  }
331  });
332  });
333 }
334 
335 template <typename scalar_t>
337  scalar_t* u,
338  scalar_t* v) {
339  // Unless the x and y coords are both close to zero, we can simply take
340  // ( -y, x, 0 ) and normalize it. If both x and y are close to zero,
341  // then the vector is close to the z-axis, so it's far from colinear to
342  // the x-axis for instance. So we take the crossed product with (1,0,0)
343  // and normalize it.
344  if (!(abs(query[0] - query[2]) < 1e-6) ||
345  !(abs(query[1] - query[2]) < 1e-6)) {
346  const scalar_t norm2_inv =
347  1.0 / sqrt(query[0] * query[0] + query[1] * query[1]);
348  v[0] = -1 * query[1] * norm2_inv;
349  v[1] = query[0] * norm2_inv;
350  v[2] = 0;
351  } else {
352  const scalar_t norm2_inv =
353  1.0 / sqrt(query[1] * query[1] + query[2] * query[2]);
354  v[0] = 0;
355  v[1] = -1 * query[2] * norm2_inv;
356  v[2] = query[1] * norm2_inv;
357  }
358 
359  core::linalg::kernel::cross_3x1(query, v, u);
360 }
361 
362 template <typename scalar_t>
363 inline OPEN3D_HOST_DEVICE void Swap(scalar_t* x, scalar_t* y) {
364  scalar_t tmp = *x;
365  *x = *y;
366  *y = tmp;
367 }
368 
369 template <typename scalar_t>
370 inline OPEN3D_HOST_DEVICE void Heapify(scalar_t* arr, int n, int root) {
371  int largest = root;
372  int l = 2 * root + 1;
373  int r = 2 * root + 2;
374 
375  if (l < n && arr[l] > arr[largest]) {
376  largest = l;
377  }
378  if (r < n && arr[r] > arr[largest]) {
379  largest = r;
380  }
381  if (largest != root) {
382  Swap<scalar_t>(&arr[root], &arr[largest]);
383  Heapify<scalar_t>(arr, n, largest);
384  }
385 }
386 
387 template <typename scalar_t>
388 OPEN3D_HOST_DEVICE void HeapSort(scalar_t* arr, int n) {
389  for (int i = n / 2 - 1; i >= 0; i--) Heapify(arr, n, i);
390 
391  for (int i = n - 1; i > 0; i--) {
392  Swap<scalar_t>(&arr[0], &arr[i]);
393  Heapify<scalar_t>(arr, i, 0);
394  }
395 }
396 
397 template <typename scalar_t>
398 OPEN3D_HOST_DEVICE bool IsBoundaryPoints(const scalar_t* angles,
399  int counts,
400  double angle_threshold) {
401  scalar_t diff;
402  scalar_t max_diff = 0;
403  // Compute the maximal angle difference between two consecutive angles.
404  for (int i = 0; i < counts - 1; i++) {
405  diff = angles[i + 1] - angles[i];
406  max_diff = max(max_diff, diff);
407  }
408 
409  // Get the angle difference between the last and the first.
410  diff = 2 * M_PI - angles[counts - 1] + angles[0];
411  max_diff = max(max_diff, diff);
412 
413  return max_diff > angle_threshold * M_PI / 180.0 ? true : false;
414 }
415 
416 #if defined(__CUDACC__)
417 void ComputeBoundaryPointsCUDA
418 #else
420 #endif
421  (const core::Tensor& points,
422  const core::Tensor& normals,
423  const core::Tensor& indices,
424  const core::Tensor& counts,
425  core::Tensor& mask,
426  double angle_threshold) {
427  const int nn_size = indices.GetShape()[1];
428 
429  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(points.GetDtype(), [&]() {
430  const scalar_t* points_ptr = points.GetDataPtr<scalar_t>();
431  const scalar_t* normals_ptr = normals.GetDataPtr<scalar_t>();
432  const int64_t n = points.GetLength();
433  const int32_t* indices_ptr = indices.GetDataPtr<int32_t>();
434  const int32_t* counts_ptr = counts.GetDataPtr<int32_t>();
435  bool* mask_ptr = mask.GetDataPtr<bool>();
436 
437  core::Tensor angles = core::Tensor::Full(
438  indices.GetShape(), -10, points.GetDtype(), points.GetDevice());
439  scalar_t* angles_ptr = angles.GetDataPtr<scalar_t>();
440 
441  core::ParallelFor(
442  points.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) {
443  scalar_t u[3], v[3];
444  GetCoordinateSystemOnPlane(normals_ptr + 3 * workload_idx,
445  u, v);
446 
447  // Ignore the point itself.
448  int indices_size = counts_ptr[workload_idx] - 1;
449  if (indices_size > 0) {
450  const scalar_t* query = points_ptr + 3 * workload_idx;
451  for (int i = 1; i < indices_size + 1; i++) {
452  const int idx = workload_idx * nn_size + i;
453 
454  const scalar_t* point_ref =
455  points_ptr + 3 * indices_ptr[idx];
456  const scalar_t delta[3] = {point_ref[0] - query[0],
457  point_ref[1] - query[1],
458  point_ref[2] - query[2]};
459  const scalar_t angle = atan2(
460  core::linalg::kernel::dot_3x1(v, delta),
461  core::linalg::kernel::dot_3x1(u, delta));
462 
463  angles_ptr[idx] = angle;
464  }
465 
466  // Sort the angles in ascending order.
467  HeapSort<scalar_t>(
468  angles_ptr + workload_idx * nn_size + 1,
469  indices_size);
470 
471  mask_ptr[workload_idx] = IsBoundaryPoints<scalar_t>(
472  angles_ptr + workload_idx * nn_size + 1,
473  indices_size, angle_threshold);
474  }
475  });
476  });
477 }
478 
479 // This is a `two-pass` estimate method for covariance which is numerically more
480 // robust than the `textbook` method generally used for covariance computation.
481 template <typename scalar_t>
483  const scalar_t* points_ptr,
484  const int32_t* indices_ptr,
485  const int32_t& indices_count,
486  scalar_t* covariance_ptr) {
487  if (indices_count < 3) {
488  covariance_ptr[0] = 1.0;
489  covariance_ptr[1] = 0.0;
490  covariance_ptr[2] = 0.0;
491  covariance_ptr[3] = 0.0;
492  covariance_ptr[4] = 1.0;
493  covariance_ptr[5] = 0.0;
494  covariance_ptr[6] = 0.0;
495  covariance_ptr[7] = 0.0;
496  covariance_ptr[8] = 1.0;
497  return;
498  }
499 
500  double centroid[3] = {0};
501  for (int32_t i = 0; i < indices_count; ++i) {
502  int32_t idx = 3 * indices_ptr[i];
503  centroid[0] += points_ptr[idx];
504  centroid[1] += points_ptr[idx + 1];
505  centroid[2] += points_ptr[idx + 2];
506  }
507 
508  centroid[0] /= indices_count;
509  centroid[1] /= indices_count;
510  centroid[2] /= indices_count;
511 
512  // cumulants must always be Float64 to ensure precision.
513  double cumulants[6] = {0};
514  for (int32_t i = 0; i < indices_count; ++i) {
515  int32_t idx = 3 * indices_ptr[i];
516  const double x = static_cast<double>(points_ptr[idx]) - centroid[0];
517  const double y = static_cast<double>(points_ptr[idx + 1]) - centroid[1];
518  const double z = static_cast<double>(points_ptr[idx + 2]) - centroid[2];
519 
520  cumulants[0] += x * x;
521  cumulants[1] += y * y;
522  cumulants[2] += z * z;
523 
524  cumulants[3] += x * y;
525  cumulants[4] += x * z;
526  cumulants[5] += y * z;
527  }
528 
529  // Using Bessel's correction (dividing by (n - 1) instead of n).
530  // Refer:
531  // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
532  const double normalization_factor = static_cast<double>(indices_count - 1);
533  for (int i = 0; i < 6; ++i) {
534  cumulants[i] /= normalization_factor;
535  }
536 
537  // Covariances(0, 0)
538  covariance_ptr[0] = static_cast<scalar_t>(cumulants[0]);
539  // Covariances(1, 1)
540  covariance_ptr[4] = static_cast<scalar_t>(cumulants[1]);
541  // Covariances(2, 2)
542  covariance_ptr[8] = static_cast<scalar_t>(cumulants[2]);
543 
544  // Covariances(0, 1) = Covariances(1, 0)
545  covariance_ptr[1] = static_cast<scalar_t>(cumulants[3]);
546  covariance_ptr[3] = covariance_ptr[1];
547 
548  // Covariances(0, 2) = Covariances(2, 0)
549  covariance_ptr[2] = static_cast<scalar_t>(cumulants[4]);
550  covariance_ptr[6] = covariance_ptr[2];
551 
552  // Covariances(1, 2) = Covariances(2, 1)
553  covariance_ptr[5] = static_cast<scalar_t>(cumulants[5]);
554  covariance_ptr[7] = covariance_ptr[5];
555 }
556 
557 #if defined(__CUDACC__)
558 void EstimateCovariancesUsingHybridSearchCUDA
559 #else
561 #endif
562  (const core::Tensor& points,
563  core::Tensor& covariances,
564  const double& radius,
565  const int64_t& max_nn) {
566  core::Dtype dtype = points.GetDtype();
567  int64_t n = points.GetLength();
568 
570  bool check = tree.HybridIndex(radius);
571  if (!check) {
572  utility::LogError("Building FixedRadiusIndex failed.");
573  }
574 
575  core::Tensor indices, distance, counts;
576  std::tie(indices, distance, counts) =
577  tree.HybridSearch(points, radius, max_nn);
578 
579  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
580  const scalar_t* points_ptr = points.GetDataPtr<scalar_t>();
581  int32_t* neighbour_indices_ptr = indices.GetDataPtr<int32_t>();
582  int32_t* neighbour_counts_ptr = counts.GetDataPtr<int32_t>();
583  scalar_t* covariances_ptr = covariances.GetDataPtr<scalar_t>();
584 
586  points.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) {
587  // NNS [Hybrid Search].
588  const int32_t neighbour_offset = max_nn * workload_idx;
589  // Count of valid correspondences per point.
590  const int32_t neighbour_count =
591  neighbour_counts_ptr[workload_idx];
592  // Covariance is of shape {3, 3}, so it has an
593  // offset factor of 9 x workload_idx.
594  const int32_t covariances_offset = 9 * workload_idx;
595 
597  points_ptr,
598  neighbour_indices_ptr + neighbour_offset,
599  neighbour_count,
600  covariances_ptr + covariances_offset);
601  });
602  });
603 
604  core::cuda::Synchronize(points.GetDevice());
605 }
606 
607 #if defined(__CUDACC__)
608 void EstimateCovariancesUsingRadiusSearchCUDA
609 #else
611 #endif
612  (const core::Tensor& points,
613  core::Tensor& covariances,
614  const double& radius) {
615  core::Dtype dtype = points.GetDtype();
616  int64_t n = points.GetLength();
617 
619  bool check = tree.FixedRadiusIndex(radius);
620  if (!check) {
621  utility::LogError("Building Radius-Index failed.");
622  }
623 
624  core::Tensor indices, distance, counts;
625  std::tie(indices, distance, counts) =
626  tree.FixedRadiusSearch(points, radius);
627 
628  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
629  const scalar_t* points_ptr = points.GetDataPtr<scalar_t>();
630  const int32_t* neighbour_indices_ptr = indices.GetDataPtr<int32_t>();
631  const int32_t* neighbour_counts_ptr = counts.GetDataPtr<int32_t>();
632  scalar_t* covariances_ptr = covariances.GetDataPtr<scalar_t>();
633 
635  points.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) {
636  const int32_t neighbour_offset =
637  neighbour_counts_ptr[workload_idx];
638  const int32_t neighbour_count =
639  (neighbour_counts_ptr[workload_idx + 1] -
640  neighbour_counts_ptr[workload_idx]);
641  // Covariance is of shape {3, 3}, so it has an offset
642  // factor of 9 x workload_idx.
643  const int32_t covariances_offset = 9 * workload_idx;
644 
646  points_ptr,
647  neighbour_indices_ptr + neighbour_offset,
648  neighbour_count,
649  covariances_ptr + covariances_offset);
650  });
651  });
652 
653  core::cuda::Synchronize(points.GetDevice());
654 }
655 
656 #if defined(__CUDACC__)
657 void EstimateCovariancesUsingKNNSearchCUDA
658 #else
660 #endif
661  (const core::Tensor& points,
662  core::Tensor& covariances,
663  const int64_t& max_nn) {
664  core::Dtype dtype = points.GetDtype();
665  int64_t n = points.GetLength();
666 
668  bool check = tree.KnnIndex();
669  if (!check) {
670  utility::LogError("Building KNN-Index failed.");
671  }
672 
673  core::Tensor indices, distance;
674  std::tie(indices, distance) = tree.KnnSearch(points, max_nn);
675 
676  indices = indices.Contiguous();
677  int32_t nn_count = static_cast<int32_t>(indices.GetShape()[1]);
678 
679  if (nn_count < 3) {
681  "Not enough neighbors to compute Covariances / Normals. "
682  "Try "
683  "increasing the max_nn parameter.");
684  }
685 
686  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
687  auto points_ptr = points.GetDataPtr<scalar_t>();
688  auto neighbour_indices_ptr = indices.GetDataPtr<int32_t>();
689  auto covariances_ptr = covariances.GetDataPtr<scalar_t>();
690 
692  points.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) {
693  // NNS [KNN Search].
694  const int32_t neighbour_offset = nn_count * workload_idx;
695  // Covariance is of shape {3, 3}, so it has an offset
696  // factor of 9 x workload_idx.
697  const int32_t covariances_offset = 9 * workload_idx;
698 
700  points_ptr,
701  neighbour_indices_ptr + neighbour_offset, nn_count,
702  covariances_ptr + covariances_offset);
703  });
704  });
705 
706  core::cuda::Synchronize(points.GetDevice());
707 }
708 
709 template <typename scalar_t>
711  const scalar_t eval0,
712  scalar_t* eigen_vector0) {
713  scalar_t row0[3] = {A[0] - eval0, A[1], A[2]};
714  scalar_t row1[3] = {A[1], A[4] - eval0, A[5]};
715  scalar_t row2[3] = {A[2], A[5], A[8] - eval0};
716 
717  scalar_t r0xr1[3], r0xr2[3], r1xr2[3];
718 
719  core::linalg::kernel::cross_3x1(row0, row1, r0xr1);
720  core::linalg::kernel::cross_3x1(row0, row2, r0xr2);
721  core::linalg::kernel::cross_3x1(row1, row2, r1xr2);
722 
723  scalar_t d0 = core::linalg::kernel::dot_3x1(r0xr1, r0xr1);
724  scalar_t d1 = core::linalg::kernel::dot_3x1(r0xr2, r0xr2);
725  scalar_t d2 = core::linalg::kernel::dot_3x1(r1xr2, r1xr2);
726 
727  scalar_t dmax = d0;
728  int imax = 0;
729  if (d1 > dmax) {
730  dmax = d1;
731  imax = 1;
732  }
733  if (d2 > dmax) {
734  imax = 2;
735  }
736 
737  if (imax == 0) {
738  scalar_t sqrt_d = sqrt(d0);
739  eigen_vector0[0] = r0xr1[0] / sqrt_d;
740  eigen_vector0[1] = r0xr1[1] / sqrt_d;
741  eigen_vector0[2] = r0xr1[2] / sqrt_d;
742  return;
743  } else if (imax == 1) {
744  scalar_t sqrt_d = sqrt(d1);
745  eigen_vector0[0] = r0xr2[0] / sqrt_d;
746  eigen_vector0[1] = r0xr2[1] / sqrt_d;
747  eigen_vector0[2] = r0xr2[2] / sqrt_d;
748  return;
749  } else {
750  scalar_t sqrt_d = sqrt(d2);
751  eigen_vector0[0] = r1xr2[0] / sqrt_d;
752  eigen_vector0[1] = r1xr2[1] / sqrt_d;
753  eigen_vector0[2] = r1xr2[2] / sqrt_d;
754  return;
755  }
756 }
757 
758 template <typename scalar_t>
760  const scalar_t* evec0,
761  const scalar_t eval1,
762  scalar_t* eigen_vector1) {
763  scalar_t U[3];
764  if (abs(evec0[0]) > abs(evec0[1])) {
765  scalar_t inv_length =
766  1.0 / sqrt(evec0[0] * evec0[0] + evec0[2] * evec0[2]);
767  U[0] = -evec0[2] * inv_length;
768  U[1] = 0.0;
769  U[2] = evec0[0] * inv_length;
770  } else {
771  scalar_t inv_length =
772  1.0 / sqrt(evec0[1] * evec0[1] + evec0[2] * evec0[2]);
773  U[0] = 0.0;
774  U[1] = evec0[2] * inv_length;
775  U[2] = -evec0[1] * inv_length;
776  }
777  scalar_t V[3], AU[3], AV[3];
778  core::linalg::kernel::cross_3x1(evec0, U, V);
779  core::linalg::kernel::matmul3x3_3x1(A, U, AU);
780  core::linalg::kernel::matmul3x3_3x1(A, V, AV);
781 
782  scalar_t m00 = core::linalg::kernel::dot_3x1(U, AU) - eval1;
783  scalar_t m01 = core::linalg::kernel::dot_3x1(U, AV);
784  scalar_t m11 = core::linalg::kernel::dot_3x1(V, AV) - eval1;
785 
786  scalar_t absM00 = abs(m00);
787  scalar_t absM01 = abs(m01);
788  scalar_t absM11 = abs(m11);
789  scalar_t max_abs_comp;
790 
791  if (absM00 >= absM11) {
792  max_abs_comp = max(absM00, absM01);
793  if (max_abs_comp > 0) {
794  if (absM00 >= absM01) {
795  m01 /= m00;
796  m00 = 1 / sqrt(1 + m01 * m01);
797  m01 *= m00;
798  } else {
799  m00 /= m01;
800  m01 = 1 / sqrt(1 + m00 * m00);
801  m00 *= m01;
802  }
803  eigen_vector1[0] = m01 * U[0] - m00 * V[0];
804  eigen_vector1[1] = m01 * U[1] - m00 * V[1];
805  eigen_vector1[2] = m01 * U[2] - m00 * V[2];
806  return;
807  } else {
808  eigen_vector1[0] = U[0];
809  eigen_vector1[1] = U[1];
810  eigen_vector1[2] = U[2];
811  return;
812  }
813  } else {
814  max_abs_comp = max(absM11, absM01);
815  if (max_abs_comp > 0) {
816  if (absM11 >= absM01) {
817  m01 /= m11;
818  m11 = 1 / sqrt(1 + m01 * m01);
819  m01 *= m11;
820  } else {
821  m11 /= m01;
822  m01 = 1 / sqrt(1 + m11 * m11);
823  m11 *= m01;
824  }
825  eigen_vector1[0] = m11 * U[0] - m01 * V[0];
826  eigen_vector1[1] = m11 * U[1] - m01 * V[1];
827  eigen_vector1[2] = m11 * U[2] - m01 * V[2];
828  return;
829  } else {
830  eigen_vector1[0] = U[0];
831  eigen_vector1[1] = U[1];
832  eigen_vector1[2] = U[2];
833  return;
834  }
835  }
836 }
837 
838 template <typename scalar_t>
840  const scalar_t* covariance_ptr, scalar_t* normals_ptr) {
841  // Based on:
842  // https://www.geometrictools.com/Documentation/RobustEigenSymmetric3x3.pdf
843  // which handles edge cases like points on a plane.
844  scalar_t max_coeff = covariance_ptr[0];
845 
846  for (int i = 1; i < 9; ++i) {
847  if (max_coeff < covariance_ptr[i]) {
848  max_coeff = covariance_ptr[i];
849  }
850  }
851 
852  if (max_coeff == 0) {
853  normals_ptr[0] = 0.0;
854  normals_ptr[1] = 0.0;
855  normals_ptr[2] = 0.0;
856  return;
857  }
858 
859  scalar_t A[9] = {0};
860 
861  for (int i = 0; i < 9; ++i) {
862  A[i] = covariance_ptr[i] / max_coeff;
863  }
864 
865  scalar_t norm = A[1] * A[1] + A[2] * A[2] + A[5] * A[5];
866 
867  if (norm > 0) {
868  scalar_t eval[3];
869  scalar_t evec0[3];
870  scalar_t evec1[3];
871  scalar_t evec2[3];
872 
873  scalar_t q = (A[0] + A[4] + A[8]) / 3.0;
874 
875  scalar_t b00 = A[0] - q;
876  scalar_t b11 = A[4] - q;
877  scalar_t b22 = A[8] - q;
878 
879  scalar_t p =
880  sqrt((b00 * b00 + b11 * b11 + b22 * b22 + norm * 2.0) / 6.0);
881 
882  scalar_t c00 = b11 * b22 - A[5] * A[5];
883  scalar_t c01 = A[1] * b22 - A[5] * A[2];
884  scalar_t c02 = A[1] * A[5] - b11 * A[2];
885  scalar_t det = (b00 * c00 - A[1] * c01 + A[2] * c02) / (p * p * p);
886 
887  scalar_t half_det = det * 0.5;
888  half_det = min(max(half_det, static_cast<scalar_t>(-1.0)),
889  static_cast<scalar_t>(1.0));
890 
891  scalar_t angle = acos(half_det) / 3.0;
892  const scalar_t two_thrids_pi = 2.09439510239319549;
893 
894  scalar_t beta2 = cos(angle) * 2.0;
895  scalar_t beta0 = cos(angle + two_thrids_pi) * 2.0;
896  scalar_t beta1 = -(beta0 + beta2);
897 
898  eval[0] = q + p * beta0;
899  eval[1] = q + p * beta1;
900  eval[2] = q + p * beta2;
901 
902  if (half_det >= 0) {
903  ComputeEigenvector0<scalar_t>(A, eval[2], evec2);
904 
905  if (eval[2] < eval[0] && eval[2] < eval[1]) {
906  normals_ptr[0] = evec2[0];
907  normals_ptr[1] = evec2[1];
908  normals_ptr[2] = evec2[2];
909 
910  return;
911  }
912 
913  ComputeEigenvector1<scalar_t>(A, evec2, eval[1], evec1);
914 
915  if (eval[1] < eval[0] && eval[1] < eval[2]) {
916  normals_ptr[0] = evec1[0];
917  normals_ptr[1] = evec1[1];
918  normals_ptr[2] = evec1[2];
919 
920  return;
921  }
922 
923  normals_ptr[0] = evec1[1] * evec2[2] - evec1[2] * evec2[1];
924  normals_ptr[1] = evec1[2] * evec2[0] - evec1[0] * evec2[2];
925  normals_ptr[2] = evec1[0] * evec2[1] - evec1[1] * evec2[0];
926 
927  return;
928  } else {
929  ComputeEigenvector0<scalar_t>(A, eval[0], evec0);
930 
931  if (eval[0] < eval[1] && eval[0] < eval[2]) {
932  normals_ptr[0] = evec0[0];
933  normals_ptr[1] = evec0[1];
934  normals_ptr[2] = evec0[2];
935  return;
936  }
937 
938  ComputeEigenvector1<scalar_t>(A, evec0, eval[1], evec1);
939 
940  if (eval[1] < eval[0] && eval[1] < eval[2]) {
941  normals_ptr[0] = evec1[0];
942  normals_ptr[1] = evec1[1];
943  normals_ptr[2] = evec1[2];
944  return;
945  }
946 
947  normals_ptr[0] = evec0[1] * evec1[2] - evec0[2] * evec1[1];
948  normals_ptr[1] = evec0[2] * evec1[0] - evec0[0] * evec1[2];
949  normals_ptr[2] = evec0[0] * evec1[1] - evec0[1] * evec1[0];
950  return;
951  }
952  } else {
953  if (covariance_ptr[0] < covariance_ptr[4] &&
954  covariance_ptr[0] < covariance_ptr[8]) {
955  normals_ptr[0] = 1.0;
956  normals_ptr[1] = 0.0;
957  normals_ptr[2] = 0.0;
958  return;
959  } else if (covariance_ptr[0] < covariance_ptr[4] &&
960  covariance_ptr[0] < covariance_ptr[8]) {
961  normals_ptr[0] = 0.0;
962  normals_ptr[1] = 1.0;
963  normals_ptr[2] = 0.0;
964  return;
965  } else {
966  normals_ptr[0] = 0.0;
967  normals_ptr[1] = 0.0;
968  normals_ptr[2] = 1.0;
969  return;
970  }
971  }
972 }
973 
974 #if defined(__CUDACC__)
975 void EstimateNormalsFromCovariancesCUDA
976 #else
978 #endif
979  (const core::Tensor& covariances,
980  core::Tensor& normals,
981  const bool has_normals) {
982  core::Dtype dtype = covariances.GetDtype();
983  int64_t n = covariances.GetLength();
984 
985  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
986  const scalar_t* covariances_ptr = covariances.GetDataPtr<scalar_t>();
987  scalar_t* normals_ptr = normals.GetDataPtr<scalar_t>();
988 
990  covariances.GetDevice(), n,
991  [=] OPEN3D_DEVICE(int64_t workload_idx) {
992  int32_t covariances_offset = 9 * workload_idx;
993  int32_t normals_offset = 3 * workload_idx;
994  scalar_t normals_output[3] = {0};
995  EstimatePointWiseNormalsWithFastEigen3x3<scalar_t>(
996  covariances_ptr + covariances_offset,
997  normals_output);
998 
999  if ((normals_output[0] * normals_output[0] +
1000  normals_output[1] * normals_output[1] +
1001  normals_output[2] * normals_output[2]) == 0.0 &&
1002  !has_normals) {
1003  normals_output[0] = 0.0;
1004  normals_output[1] = 0.0;
1005  normals_output[2] = 1.0;
1006  }
1007  if (has_normals) {
1008  if ((normals_ptr[normals_offset] * normals_output[0] +
1009  normals_ptr[normals_offset + 1] *
1010  normals_output[1] +
1011  normals_ptr[normals_offset + 2] *
1012  normals_output[2]) < 0.0) {
1013  normals_output[0] *= -1;
1014  normals_output[1] *= -1;
1015  normals_output[2] *= -1;
1016  }
1017  }
1018 
1019  normals_ptr[normals_offset] = normals_output[0];
1020  normals_ptr[normals_offset + 1] = normals_output[1];
1021  normals_ptr[normals_offset + 2] = normals_output[2];
1022  });
1023  });
1024 
1025  core::cuda::Synchronize(covariances.GetDevice());
1026 }
1027 
1028 template <typename scalar_t>
1030  const scalar_t* points_ptr,
1031  const scalar_t* normals_ptr,
1032  const scalar_t* colors_ptr,
1033  const int32_t& idx_offset,
1034  const int32_t* indices_ptr,
1035  const int32_t& indices_count,
1036  scalar_t* color_gradients_ptr) {
1037  if (indices_count < 4) {
1038  color_gradients_ptr[idx_offset] = 0;
1039  color_gradients_ptr[idx_offset + 1] = 0;
1040  color_gradients_ptr[idx_offset + 2] = 0;
1041  } else {
1042  scalar_t vt[3] = {points_ptr[idx_offset], points_ptr[idx_offset + 1],
1043  points_ptr[idx_offset + 2]};
1044 
1045  scalar_t nt[3] = {normals_ptr[idx_offset], normals_ptr[idx_offset + 1],
1046  normals_ptr[idx_offset + 2]};
1047 
1048  scalar_t it = (colors_ptr[idx_offset] + colors_ptr[idx_offset + 1] +
1049  colors_ptr[idx_offset + 2]) /
1050  3.0;
1051 
1052  scalar_t AtA[9] = {0};
1053  scalar_t Atb[3] = {0};
1054 
1055  // approximate image gradient of vt's tangential plane
1056  // projection (p') of a point p on a plane defined by
1057  // normal n, where o is the closest point to p on the
1058  // plane, is given by:
1059  // p' = p - [(p - o).dot(n)] * n p'
1060  // => p - [(p.dot(n) - s)] * n [where s = o.dot(n)]
1061 
1062  // Computing the scalar s.
1063  scalar_t s = vt[0] * nt[0] + vt[1] * nt[1] + vt[2] * nt[2];
1064 
1065  int i = 1;
1066  for (; i < indices_count; i++) {
1067  int64_t neighbour_idx_offset = 3 * indices_ptr[i];
1068 
1069  if (neighbour_idx_offset == -1) {
1070  break;
1071  }
1072 
1073  scalar_t vt_adj[3] = {points_ptr[neighbour_idx_offset],
1074  points_ptr[neighbour_idx_offset + 1],
1075  points_ptr[neighbour_idx_offset + 2]};
1076 
1077  // p' = p - d * n [where d = p.dot(n) - s]
1078  // Computing the scalar d.
1079  scalar_t d = vt_adj[0] * nt[0] + vt_adj[1] * nt[1] +
1080  vt_adj[2] * nt[2] - s;
1081 
1082  // Computing the p' (projection of the point).
1083  scalar_t vt_proj[3] = {vt_adj[0] - d * nt[0], vt_adj[1] - d * nt[1],
1084  vt_adj[2] - d * nt[2]};
1085 
1086  scalar_t it_adj = (colors_ptr[neighbour_idx_offset + 0] +
1087  colors_ptr[neighbour_idx_offset + 1] +
1088  colors_ptr[neighbour_idx_offset + 2]) /
1089  3.0;
1090 
1091  scalar_t A[3] = {vt_proj[0] - vt[0], vt_proj[1] - vt[1],
1092  vt_proj[2] - vt[2]};
1093 
1094  AtA[0] += A[0] * A[0];
1095  AtA[1] += A[1] * A[0];
1096  AtA[2] += A[2] * A[0];
1097  AtA[4] += A[1] * A[1];
1098  AtA[5] += A[2] * A[1];
1099  AtA[8] += A[2] * A[2];
1100 
1101  scalar_t b = it_adj - it;
1102 
1103  Atb[0] += A[0] * b;
1104  Atb[1] += A[1] * b;
1105  Atb[2] += A[2] * b;
1106  }
1107 
1108  // Orthogonal constraint.
1109  scalar_t A[3] = {(i - 1) * nt[0], (i - 1) * nt[1], (i - 1) * nt[2]};
1110 
1111  AtA[0] += A[0] * A[0];
1112  AtA[1] += A[0] * A[1];
1113  AtA[2] += A[0] * A[2];
1114  AtA[4] += A[1] * A[1];
1115  AtA[5] += A[1] * A[2];
1116  AtA[8] += A[2] * A[2];
1117 
1118  // Symmetry.
1119  AtA[3] = AtA[1];
1120  AtA[6] = AtA[2];
1121  AtA[7] = AtA[5];
1122 
1124  color_gradients_ptr + idx_offset);
1125  }
1126 }
1127 
1128 #if defined(__CUDACC__)
1129 void EstimateColorGradientsUsingHybridSearchCUDA
1130 #else
1132 #endif
1133  (const core::Tensor& points,
1134  const core::Tensor& normals,
1135  const core::Tensor& colors,
1136  core::Tensor& color_gradients,
1137  const double& radius,
1138  const int64_t& max_nn) {
1139  core::Dtype dtype = points.GetDtype();
1140  int64_t n = points.GetLength();
1141 
1143 
1144  bool check = tree.HybridIndex(radius);
1145  if (!check) {
1146  utility::LogError("NearestNeighborSearch::HybridIndex is not set.");
1147  }
1148 
1149  core::Tensor indices, distance, counts;
1150  std::tie(indices, distance, counts) =
1151  tree.HybridSearch(points, radius, max_nn);
1152 
1153  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
1154  auto points_ptr = points.GetDataPtr<scalar_t>();
1155  auto normals_ptr = normals.GetDataPtr<scalar_t>();
1156  auto colors_ptr = colors.GetDataPtr<scalar_t>();
1157  auto neighbour_indices_ptr = indices.GetDataPtr<int32_t>();
1158  auto neighbour_counts_ptr = counts.GetDataPtr<int32_t>();
1159  auto color_gradients_ptr = color_gradients.GetDataPtr<scalar_t>();
1160 
1162  points.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) {
1163  // NNS [Hybrid Search].
1164  int32_t neighbour_offset = max_nn * workload_idx;
1165  // Count of valid correspondences per point.
1166  int32_t neighbour_count =
1167  neighbour_counts_ptr[workload_idx];
1168  int32_t idx_offset = 3 * workload_idx;
1169 
1171  points_ptr, normals_ptr, colors_ptr, idx_offset,
1172  neighbour_indices_ptr + neighbour_offset,
1173  neighbour_count, color_gradients_ptr);
1174  });
1175  });
1176 
1177  core::cuda::Synchronize(points.GetDevice());
1178 }
1179 
1180 #if defined(__CUDACC__)
1181 void EstimateColorGradientsUsingKNNSearchCUDA
1182 #else
1184 #endif
1185  (const core::Tensor& points,
1186  const core::Tensor& normals,
1187  const core::Tensor& colors,
1188  core::Tensor& color_gradients,
1189  const int64_t& max_nn) {
1190  core::Dtype dtype = points.GetDtype();
1191  int64_t n = points.GetLength();
1192 
1194 
1195  bool check = tree.KnnIndex();
1196  if (!check) {
1197  utility::LogError("KnnIndex is not set.");
1198  }
1199 
1200  core::Tensor indices, distance;
1201  std::tie(indices, distance) = tree.KnnSearch(points, max_nn);
1202 
1203  indices = indices.To(core::Int32).Contiguous();
1204  int64_t nn_count = indices.GetShape()[1];
1205 
1206  if (nn_count < 4) {
1208  "Not enough neighbors to compute Covariances / Normals. "
1209  "Try "
1210  "changing the search parameter.");
1211  }
1212 
1213  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
1214  auto points_ptr = points.GetDataPtr<scalar_t>();
1215  auto normals_ptr = normals.GetDataPtr<scalar_t>();
1216  auto colors_ptr = colors.GetDataPtr<scalar_t>();
1217  auto neighbour_indices_ptr = indices.GetDataPtr<int32_t>();
1218  auto color_gradients_ptr = color_gradients.GetDataPtr<scalar_t>();
1219 
1221  points.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) {
1222  int32_t neighbour_offset = max_nn * workload_idx;
1223  int32_t idx_offset = 3 * workload_idx;
1224 
1226  points_ptr, normals_ptr, colors_ptr, idx_offset,
1227  neighbour_indices_ptr + neighbour_offset, nn_count,
1228  color_gradients_ptr);
1229  });
1230  });
1231 
1232  core::cuda::Synchronize(points.GetDevice());
1233 }
1234 
1235 #if defined(__CUDACC__)
1236 void EstimateColorGradientsUsingRadiusSearchCUDA
1237 #else
1239 #endif
1240  (const core::Tensor& points,
1241  const core::Tensor& normals,
1242  const core::Tensor& colors,
1243  core::Tensor& color_gradients,
1244  const double& radius) {
1245  core::Dtype dtype = points.GetDtype();
1246  int64_t n = points.GetLength();
1247 
1249 
1250  bool check = tree.FixedRadiusIndex(radius);
1251  if (!check) {
1252  utility::LogError("RadiusIndex is not set.");
1253  }
1254 
1255  core::Tensor indices, distance, counts;
1256  std::tie(indices, distance, counts) =
1257  tree.FixedRadiusSearch(points, radius);
1258 
1259  indices = indices.To(core::Int32).Contiguous();
1260  counts = counts.Contiguous();
1261 
1262  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
1263  auto points_ptr = points.GetDataPtr<scalar_t>();
1264  auto normals_ptr = normals.GetDataPtr<scalar_t>();
1265  auto colors_ptr = colors.GetDataPtr<scalar_t>();
1266  auto neighbour_indices_ptr = indices.GetDataPtr<int32_t>();
1267  auto neighbour_counts_ptr = counts.GetDataPtr<int32_t>();
1268  auto color_gradients_ptr = color_gradients.GetDataPtr<scalar_t>();
1269 
1271  points.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) {
1272  int32_t neighbour_offset =
1273  neighbour_counts_ptr[workload_idx];
1274  // Count of valid correspondences per point.
1275  const int32_t neighbour_count =
1276  (neighbour_counts_ptr[workload_idx + 1] -
1277  neighbour_counts_ptr[workload_idx]);
1278  int32_t idx_offset = 3 * workload_idx;
1279 
1281  points_ptr, normals_ptr, colors_ptr, idx_offset,
1282  neighbour_indices_ptr + neighbour_offset,
1283  neighbour_count, color_gradients_ptr);
1284  });
1285  });
1286 
1287  core::cuda::Synchronize(points.GetDevice());
1288 }
1289 
1290 } // namespace pointcloud
1291 } // namespace kernel
1292 } // namespace geometry
1293 } // namespace t
1294 } // namespace open3d
Common CUDA utilities.
#define OPEN3D_HOST_DEVICE
Definition: CUDAUtils.h:44
#define OPEN3D_DEVICE
Definition: CUDAUtils.h:45
#define DISPATCH_DTYPE_TO_TEMPLATE(DTYPE,...)
Definition: Dispatch.h:30
#define DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(DTYPE,...)
Definition: Dispatch.h:77
#define LogError(...)
Definition: Logging.h:48
size_t stride
Definition: TriangleMeshBuffers.cpp:165
Definition: Dtype.h:20
Definition: Tensor.h:32
T * GetDataPtr()
Definition: Tensor.h:1133
SizeVector GetShape() const
Definition: Tensor.h:1116
Tensor Div(const Tensor &value) const
Divides a tensor and returns the resulting tensor.
Definition: Tensor.cpp:1135
Tensor Contiguous() const
Definition: Tensor.cpp:739
Tensor Transpose(int64_t dim0, int64_t dim1) const
Transpose a Tensor by swapping dimension dim0 and dim1.
Definition: Tensor.cpp:998
Tensor To(Dtype dtype, bool copy=false) const
Definition: Tensor.cpp:706
A Class for nearest neighbor search.
Definition: NearestNeighborSearch.h:25
std::tuple< Tensor, Tensor, Tensor > HybridSearch(const Tensor &query_points, const double radius, const int max_knn) const
Definition: NearestNeighborSearch.cpp:130
bool FixedRadiusIndex(utility::optional< double > radius={})
Definition: NearestNeighborSearch.cpp:40
std::tuple< Tensor, Tensor, Tensor > FixedRadiusSearch(const Tensor &query_points, double radius, bool sort=true)
Definition: NearestNeighborSearch.cpp:98
bool KnnIndex()
Definition: NearestNeighborSearch.cpp:23
bool HybridIndex(utility::optional< double > radius={})
Definition: NearestNeighborSearch.cpp:60
std::pair< Tensor, Tensor > KnnSearch(const Tensor &query_points, int knn)
Definition: NearestNeighborSearch.cpp:79
Definition: GeometryIndexer.h:161
OPEN3D_HOST_DEVICE index_t GetShape(int i) const
Definition: GeometryIndexer.h:311
Helper class for converting coordinates/indices between 3D/3D, 3D/2D, 2D/3D.
Definition: GeometryIndexer.h:25
Definition: Optional.h:259
bool has_normals
Definition: FilePCD.cpp:61
int count
Definition: FilePCD.cpp:42
int points
Definition: FilePCD.cpp:54
void Synchronize()
Definition: CUDAUtils.cpp:58
OPEN3D_HOST_DEVICE OPEN3D_FORCE_INLINE void cross_3x1(const scalar_t *A_3x1_input, const scalar_t *B_3x1_input, scalar_t *C_3x1_output)
Definition: Matrix.h:63
OPEN3D_DEVICE OPEN3D_FORCE_INLINE void solve_svd3x3(const scalar_t *A_3x3, const scalar_t *B_3x1, scalar_t *X_3x1)
Definition: SVD3x3.h:2171
OPEN3D_HOST_DEVICE OPEN3D_FORCE_INLINE scalar_t dot_3x1(const scalar_t *A_3x1_input, const scalar_t *B_3x1_input)
Definition: Matrix.h:77
const Dtype Int32
Definition: Dtype.cpp:46
void ParallelFor(const Device &device, int64_t n, const func_t &func)
Definition: ParallelFor.h:103
const Dtype Float32
Definition: Dtype.cpp:42
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t int32_t
Definition: K4aPlugin.cpp:395
void EstimateCovariancesUsingHybridSearchCPU(const core::Tensor &points, core::Tensor &covariances, const double &radius, const int64_t &max_nn)
Definition: PointCloudImpl.h:562
void EstimateCovariancesUsingRadiusSearchCPU(const core::Tensor &points, core::Tensor &covariances, const double &radius)
Definition: PointCloudImpl.h:612
OPEN3D_HOST_DEVICE void GetCoordinateSystemOnPlane(const scalar_t *query, scalar_t *u, scalar_t *v)
Definition: PointCloudImpl.h:336
void UnprojectCPU(const core::Tensor &depth, utility::optional< std::reference_wrapper< const core::Tensor >> image_colors, core::Tensor &points, utility::optional< std::reference_wrapper< core::Tensor >> colors, const core::Tensor &intrinsics, const core::Tensor &extrinsics, float depth_scale, float depth_max, int64_t stride)
Definition: PointCloudImpl.h:45
void EstimateNormalsFromCovariancesCPU(const core::Tensor &covariances, core::Tensor &normals, const bool has_normals)
Definition: PointCloudImpl.h:979
OPEN3D_HOST_DEVICE void ComputeEigenvector0(const scalar_t *A, const scalar_t eval0, scalar_t *eigen_vector0)
Definition: PointCloudImpl.h:710
void OrientNormalsTowardsCameraLocationCPU(const core::Tensor &points, core::Tensor &normals, const core::Tensor &camera)
Definition: PointCloudImpl.h:286
OPEN3D_HOST_DEVICE void EstimatePointWiseRobustNormalizedCovarianceKernel(const scalar_t *points_ptr, const int32_t *indices_ptr, const int32_t &indices_count, scalar_t *covariance_ptr)
Definition: PointCloudImpl.h:482
void GetPointMaskWithinAABBCPU(const core::Tensor &points, const core::Tensor &min_bound, const core::Tensor &max_bound, core::Tensor &mask)
Definition: PointCloudImpl.h:143
OPEN3D_HOST_DEVICE void Swap(scalar_t *x, scalar_t *y)
Definition: PointCloudImpl.h:363
OPEN3D_HOST_DEVICE bool IsBoundaryPoints(const scalar_t *angles, int counts, double angle_threshold)
Definition: PointCloudImpl.h:398
void ComputeBoundaryPointsCPU(const core::Tensor &points, const core::Tensor &normals, const core::Tensor &indices, const core::Tensor &counts, core::Tensor &mask, double angle_threshold)
Definition: PointCloudImpl.h:421
void EstimateColorGradientsUsingKNNSearchCPU(const core::Tensor &points, const core::Tensor &normals, const core::Tensor &colors, core::Tensor &color_gradient, const int64_t &max_nn)
Definition: PointCloudImpl.h:1185
void NormalizeNormalsCPU(core::Tensor &normals)
Definition: PointCloudImpl.h:221
OPEN3D_HOST_DEVICE void ComputeEigenvector1(const scalar_t *A, const scalar_t *evec0, const scalar_t eval1, scalar_t *eigen_vector1)
Definition: PointCloudImpl.h:759
OPEN3D_HOST_DEVICE void EstimatePointWiseColorGradientKernel(const scalar_t *points_ptr, const scalar_t *normals_ptr, const scalar_t *colors_ptr, const int32_t &idx_offset, const int32_t *indices_ptr, const int32_t &indices_count, scalar_t *color_gradients_ptr)
Definition: PointCloudImpl.h:1029
void EstimateColorGradientsUsingRadiusSearchCPU(const core::Tensor &points, const core::Tensor &normals, const core::Tensor &colors, core::Tensor &color_gradient, const double &radius)
Definition: PointCloudImpl.h:1240
void GetPointMaskWithinOBBCPU(const core::Tensor &points, const core::Tensor &center, const core::Tensor &rotation, const core::Tensor &extent, core::Tensor &mask)
Definition: PointCloudImpl.h:177
void EstimateColorGradientsUsingHybridSearchCPU(const core::Tensor &points, const core::Tensor &normals, const core::Tensor &colors, core::Tensor &color_gradient, const double &radius, const int64_t &max_nn)
Definition: PointCloudImpl.h:1133
OPEN3D_HOST_DEVICE void EstimatePointWiseNormalsWithFastEigen3x3(const scalar_t *covariance_ptr, scalar_t *normals_ptr)
Definition: PointCloudImpl.h:839
OPEN3D_HOST_DEVICE void Heapify(scalar_t *arr, int n, int root)
Definition: PointCloudImpl.h:370
void OrientNormalsToAlignWithDirectionCPU(core::Tensor &normals, const core::Tensor &direction)
Definition: PointCloudImpl.h:252
void EstimateCovariancesUsingKNNSearchCPU(const core::Tensor &points, core::Tensor &covariances, const int64_t &max_nn)
Definition: PointCloudImpl.h:661
OPEN3D_HOST_DEVICE void HeapSort(scalar_t *arr, int n)
Definition: PointCloudImpl.h:388
TArrayIndexer< int64_t > NDArrayIndexer
Definition: GeometryIndexer.h:360
core::Tensor InverseTransformation(const core::Tensor &T)
TODO(wei): find a proper place for such functionalities.
Definition: Utility.h:77
Definition: PinholeCameraIntrinsic.cpp:16