Open3D (C++ API)  0.18.0
FixedRadiusSearchImpl.h
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - Open3D: www.open3d.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2023 www.open3d.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
8 #pragma once
9 
10 #include <tbb/parallel_for.h>
11 
12 #include <set>
13 
14 #include "open3d/core/Atomic.h"
16 #include "open3d/utility/Eigen.h"
17 #include "open3d/utility/Helper.h"
19 
20 namespace open3d {
21 namespace core {
22 namespace nns {
23 namespace impl {
24 
25 namespace {
26 
59 template <class T>
60 void BuildSpatialHashTableCPU(const size_t num_points,
61  const T* const points,
62  const T radius,
63  const size_t points_row_splits_size,
64  const int64_t* points_row_splits,
65  const uint32_t* hash_table_splits,
66  const size_t hash_table_cell_splits_size,
67  uint32_t* hash_table_cell_splits,
68  uint32_t* hash_table_index) {
69  using namespace open3d::utility;
70  typedef MiniVec<T, 3> Vec3_t;
71 
72  const int batch_size = points_row_splits_size - 1;
73  const T voxel_size = 2 * radius;
74  const T inv_voxel_size = 1 / voxel_size;
75 
76  memset(&hash_table_cell_splits[0], 0,
77  sizeof(uint32_t) * hash_table_cell_splits_size);
78 
79  // compute number of points that map to each hash
80  for (int i = 0; i < batch_size; ++i) {
81  const size_t hash_table_size =
82  hash_table_splits[i + 1] - hash_table_splits[i];
83  const size_t first_cell_idx = hash_table_splits[i];
84  tbb::parallel_for(
85  tbb::blocked_range<int64_t>(points_row_splits[i],
86  points_row_splits[i + 1]),
87  [&](const tbb::blocked_range<int64_t>& r) {
88  for (int64_t i = r.begin(); i != r.end(); ++i) {
89  Vec3_t pos(points + 3 * i);
90 
91  auto voxel_index =
92  ComputeVoxelIndex(pos, inv_voxel_size);
93  size_t hash =
94  SpatialHash(voxel_index) % hash_table_size;
95 
96  // note the +1 because we want the first
97  // element to be 0
98  core::AtomicFetchAddRelaxed(
99  &hash_table_cell_splits[first_cell_idx + hash +
100  1],
101  1);
102  }
103  });
104  }
105  InclusivePrefixSum(&hash_table_cell_splits[0],
106  &hash_table_cell_splits[hash_table_cell_splits_size],
107  &hash_table_cell_splits[0]);
108 
109  std::vector<uint32_t> count_tmp(hash_table_cell_splits_size - 1, 0);
110 
111  // now compute the indices for hash_table_index
112  for (int i = 0; i < batch_size; ++i) {
113  const size_t hash_table_size =
114  hash_table_splits[i + 1] - hash_table_splits[i];
115  const size_t first_cell_idx = hash_table_splits[i];
116  tbb::parallel_for(
117  tbb::blocked_range<size_t>(points_row_splits[i],
118  points_row_splits[i + 1]),
119  [&](const tbb::blocked_range<size_t>& r) {
120  for (size_t i = r.begin(); i != r.end(); ++i) {
121  Vec3_t pos(points + 3 * i);
122 
123  auto voxel_index =
124  ComputeVoxelIndex(pos, inv_voxel_size);
125  size_t hash =
126  SpatialHash(voxel_index) % hash_table_size;
127 
128  hash_table_index
129  [hash_table_cell_splits[hash + first_cell_idx] +
130  core::AtomicFetchAddRelaxed(
131  &count_tmp[hash + first_cell_idx],
132  1)] = i;
133  }
134  });
135  }
136 }
137 
155 template <int METRIC, class TDerived, int VECSIZE>
156 Eigen::Array<typename TDerived::Scalar, VECSIZE, 1> NeighborsDist(
157  const Eigen::ArrayBase<TDerived>& p,
158  const Eigen::Array<typename TDerived::Scalar, VECSIZE, 3>& points) {
159  typedef Eigen::Array<typename TDerived::Scalar, VECSIZE, 1> VecN_t;
160  VecN_t dist;
161 
162  dist.setZero();
163  if (METRIC == Linf) {
164  dist = (points.rowwise() - p.transpose()).abs().rowwise().maxCoeff();
165  } else if (METRIC == L1) {
166  dist = (points.rowwise() - p.transpose()).abs().rowwise().sum();
167  } else {
168  dist = (points.rowwise() - p.transpose()).square().rowwise().sum();
169  }
170  return dist;
171 }
172 
175 template <class T,
176  class TIndex,
177  class OUTPUT_ALLOCATOR,
178  int METRIC,
179  bool IGNORE_QUERY_POINT,
180  bool RETURN_DISTANCES>
181 void _FixedRadiusSearchCPU(int64_t* query_neighbors_row_splits,
182  size_t num_points,
183  const T* const points,
184  size_t num_queries,
185  const T* const queries,
186  const T radius,
187  const size_t points_row_splits_size,
188  const int64_t* const points_row_splits,
189  const size_t queries_row_splits_size,
190  const int64_t* const queries_row_splits,
191  const uint32_t* const hash_table_splits,
192  const size_t hash_table_cell_splits_size,
193  const uint32_t* const hash_table_cell_splits,
194  const uint32_t* const hash_table_index,
195  OUTPUT_ALLOCATOR& output_allocator) {
196  using namespace open3d::utility;
197 
198 // number of elements for vectorization
199 #define VECSIZE 8
200  typedef MiniVec<T, 3> Vec3_t;
201  typedef Eigen::Array<T, VECSIZE, 1> Vec_t;
202  typedef Eigen::Array<TIndex, VECSIZE, 1> Veci_t;
203 
204  typedef Eigen::Array<T, 3, 1> Pos_t;
205  typedef Eigen::Array<T, VECSIZE, 3> Poslist_t;
206  typedef Eigen::Array<bool, VECSIZE, 1> Result_t;
207 
208  const int batch_size = points_row_splits_size - 1;
209 
210  // return empty output arrays if there are no points
211  if (num_points == 0 || num_queries == 0) {
212  std::fill(query_neighbors_row_splits,
213  query_neighbors_row_splits + num_queries + 1, 0);
214  TIndex* indices_ptr;
215  output_allocator.AllocIndices(&indices_ptr, 0);
216 
217  T* distances_ptr;
218  output_allocator.AllocDistances(&distances_ptr, 0);
219 
220  return;
221  }
222 
223  // use squared radius for L2 to avoid sqrt
224  const T threshold = (METRIC == L2 ? radius * radius : radius);
225 
226  const T voxel_size = 2 * radius;
227  const T inv_voxel_size = 1 / voxel_size;
228 
229  // counts the number of indices we have to return. This is the number of all
230  // neighbors we find.
231  size_t num_indices = 0;
232 
233  // count the number of neighbors for all query points and update num_indices
234  // and populate query_neighbors_row_splits with the number of neighbors
235  // for each query point
236  for (int i = 0; i < batch_size; ++i) {
237  const size_t hash_table_size =
238  hash_table_splits[i + 1] - hash_table_splits[i];
239  const size_t first_cell_idx = hash_table_splits[i];
240  tbb::parallel_for(
241  tbb::blocked_range<size_t>(queries_row_splits[i],
242  queries_row_splits[i + 1]),
243  [&](const tbb::blocked_range<size_t>& r) {
244  size_t num_indices_local = 0;
245  for (size_t i = r.begin(); i != r.end(); ++i) {
246  size_t neighbors_count = 0;
247 
248  Vec3_t pos(queries + i * 3);
249 
250  std::set<size_t> bins_to_visit;
251 
252  auto voxel_index =
253  ComputeVoxelIndex(pos, inv_voxel_size);
254  size_t hash =
255  SpatialHash(voxel_index) % hash_table_size;
256 
257  bins_to_visit.insert(first_cell_idx + hash);
258 
259  for (int dz = -1; dz <= 1; dz += 2)
260  for (int dy = -1; dy <= 1; dy += 2)
261  for (int dx = -1; dx <= 1; dx += 2) {
262  Vec3_t p =
263  pos + radius * Vec3_t(T(dx), T(dy),
264  T(dz));
265  voxel_index = ComputeVoxelIndex(
266  p, inv_voxel_size);
267  hash = SpatialHash(voxel_index) %
268  hash_table_size;
269  bins_to_visit.insert(first_cell_idx + hash);
270  }
271 
272  Poslist_t xyz;
273  int vec_i = 0;
274 
275  for (size_t bin : bins_to_visit) {
276  size_t begin_idx = hash_table_cell_splits[bin];
277  size_t end_idx = hash_table_cell_splits[bin + 1];
278 
279  for (size_t j = begin_idx; j < end_idx; ++j) {
280  uint32_t idx = hash_table_index[j];
281  if (IGNORE_QUERY_POINT) {
282  if (points[idx * 3 + 0] == pos[0] &&
283  points[idx * 3 + 1] == pos[1] &&
284  points[idx * 3 + 2] == pos[2])
285  continue;
286  }
287  xyz(vec_i, 0) = points[idx * 3 + 0];
288  xyz(vec_i, 1) = points[idx * 3 + 1];
289  xyz(vec_i, 2) = points[idx * 3 + 2];
290  ++vec_i;
291  if (VECSIZE == vec_i) {
292  Pos_t pos_arr(pos[0], pos[1], pos[2]);
293  Vec_t dist = NeighborsDist<METRIC, Pos_t,
294  VECSIZE>(pos_arr,
295  xyz);
296  Result_t test_result = dist <= threshold;
297  neighbors_count += test_result.count();
298  vec_i = 0;
299  }
300  }
301  }
302  // process the tail
303  if (vec_i) {
304  Pos_t pos_arr(pos[0], pos[1], pos[2]);
305  Vec_t dist = NeighborsDist<METRIC, Pos_t, VECSIZE>(
306  pos_arr, xyz);
307  Result_t test_result = dist <= threshold;
308  for (int k = 0; k < vec_i; ++k) {
309  neighbors_count += int(test_result(k));
310  }
311  vec_i = 0;
312  }
313  num_indices_local += neighbors_count;
314  // note the +1
315  query_neighbors_row_splits[i + 1] = neighbors_count;
316  }
317 
318  core::AtomicFetchAddRelaxed((uint64_t*)&num_indices,
319  num_indices_local);
320  });
321  }
322 
323  // Allocate output arrays
324  // output for the indices to the neighbors
325  TIndex* indices_ptr;
326  output_allocator.AllocIndices(&indices_ptr, num_indices);
327 
328  // output for the distances
329  T* distances_ptr;
330  if (RETURN_DISTANCES)
331  output_allocator.AllocDistances(&distances_ptr, num_indices);
332  else
333  output_allocator.AllocDistances(&distances_ptr, 0);
334 
335  query_neighbors_row_splits[0] = 0;
336  InclusivePrefixSum(query_neighbors_row_splits + 1,
337  query_neighbors_row_splits + num_queries + 1,
338  query_neighbors_row_splits + 1);
339 
340  // now populate the indices_ptr and distances_ptr array
341  for (int i = 0; i < batch_size; ++i) {
342  const size_t hash_table_size =
343  hash_table_splits[i + 1] - hash_table_splits[i];
344  const size_t first_cell_idx = hash_table_splits[i];
345  tbb::parallel_for(
346  tbb::blocked_range<size_t>(queries_row_splits[i],
347  queries_row_splits[i + 1]),
348  [&](const tbb::blocked_range<size_t>& r) {
349  for (size_t i = r.begin(); i != r.end(); ++i) {
350  size_t neighbors_count = 0;
351 
352  size_t indices_offset = query_neighbors_row_splits[i];
353 
354  Vec3_t pos(queries[i * 3 + 0], queries[i * 3 + 1],
355  queries[i * 3 + 2]);
356 
357  std::set<size_t> bins_to_visit;
358 
359  auto voxel_index =
360  ComputeVoxelIndex(pos, inv_voxel_size);
361  size_t hash =
362  SpatialHash(voxel_index) % hash_table_size;
363 
364  bins_to_visit.insert(first_cell_idx + hash);
365 
366  for (int dz = -1; dz <= 1; dz += 2)
367  for (int dy = -1; dy <= 1; dy += 2)
368  for (int dx = -1; dx <= 1; dx += 2) {
369  Vec3_t p =
370  pos + radius * Vec3_t(T(dx), T(dy),
371  T(dz));
372  voxel_index = ComputeVoxelIndex(
373  p, inv_voxel_size);
374  hash = SpatialHash(voxel_index) %
375  hash_table_size;
376  bins_to_visit.insert(first_cell_idx + hash);
377  }
378 
379  Poslist_t xyz;
380  Veci_t idx_vec;
381  int vec_i = 0;
382 
383  for (size_t bin : bins_to_visit) {
384  size_t begin_idx = hash_table_cell_splits[bin];
385  size_t end_idx = hash_table_cell_splits[bin + 1];
386 
387  for (size_t j = begin_idx; j < end_idx; ++j) {
388  int64_t idx = hash_table_index[j];
389  if (IGNORE_QUERY_POINT) {
390  if (points[idx * 3 + 0] == pos[0] &&
391  points[idx * 3 + 1] == pos[1] &&
392  points[idx * 3 + 2] == pos[2])
393  continue;
394  }
395  xyz(vec_i, 0) = points[idx * 3 + 0];
396  xyz(vec_i, 1) = points[idx * 3 + 1];
397  xyz(vec_i, 2) = points[idx * 3 + 2];
398  idx_vec(vec_i) = idx;
399  ++vec_i;
400  if (VECSIZE == vec_i) {
401  Pos_t pos_arr(pos[0], pos[1], pos[2]);
402  Vec_t dist = NeighborsDist<METRIC, Pos_t,
403  VECSIZE>(pos_arr,
404  xyz);
405  Result_t test_result = dist <= threshold;
406  for (int k = 0; k < vec_i; ++k) {
407  if (test_result(k)) {
408  indices_ptr[indices_offset +
409  neighbors_count] =
410  idx_vec[k];
411  if (RETURN_DISTANCES) {
412  distances_ptr[indices_offset +
413  neighbors_count] =
414  dist[k];
415  }
416  }
417  neighbors_count += int(test_result(k));
418  }
419  vec_i = 0;
420  }
421  }
422  }
423  // process the tail
424  if (vec_i) {
425  Pos_t pos_arr(pos[0], pos[1], pos[2]);
426  Vec_t dist = NeighborsDist<METRIC, Pos_t, VECSIZE>(
427  pos_arr, xyz);
428  Result_t test_result = dist <= threshold;
429  for (int k = 0; k < vec_i; ++k) {
430  if (test_result(k)) {
431  indices_ptr[indices_offset +
432  neighbors_count] = idx_vec[k];
433  if (RETURN_DISTANCES) {
434  distances_ptr[indices_offset +
435  neighbors_count] =
436  dist[k];
437  }
438  }
439  neighbors_count += int(test_result(k));
440  }
441  vec_i = 0;
442  }
443  }
444  });
445  }
446 #undef VECSIZE
447 }
448 
449 } // namespace
450 
529 template <class T, class TIndex, class OUTPUT_ALLOCATOR>
530 void FixedRadiusSearchCPU(int64_t* query_neighbors_row_splits,
531  const size_t num_points,
532  const T* const points,
533  const size_t num_queries,
534  const T* const queries,
535  const T radius,
536  const size_t points_row_splits_size,
537  const int64_t* const points_row_splits,
538  const size_t queries_row_splits_size,
539  const int64_t* const queries_row_splits,
540  const uint32_t* const hash_table_splits,
541  const size_t hash_table_cell_splits_size,
542  const uint32_t* const hash_table_cell_splits,
543  const uint32_t* const hash_table_index,
544  const Metric metric,
545  const bool ignore_query_point,
546  const bool return_distances,
547  OUTPUT_ALLOCATOR& output_allocator) {
548  // Dispatch all template parameter combinations
549 
550 #define FN_PARAMETERS \
551  query_neighbors_row_splits, num_points, points, num_queries, queries, \
552  radius, points_row_splits_size, points_row_splits, \
553  queries_row_splits_size, queries_row_splits, hash_table_splits, \
554  hash_table_cell_splits_size, hash_table_cell_splits, \
555  hash_table_index, output_allocator
556 
557 #define CALL_TEMPLATE(METRIC, IGNORE_QUERY_POINT, RETURN_DISTANCES) \
558  if (METRIC == metric && IGNORE_QUERY_POINT == ignore_query_point && \
559  RETURN_DISTANCES == return_distances) \
560  _FixedRadiusSearchCPU<T, TIndex, OUTPUT_ALLOCATOR, METRIC, \
561  IGNORE_QUERY_POINT, RETURN_DISTANCES>( \
562  FN_PARAMETERS);
563 
564 #define CALL_TEMPLATE2(METRIC) \
565  CALL_TEMPLATE(METRIC, true, true) \
566  CALL_TEMPLATE(METRIC, true, false) \
567  CALL_TEMPLATE(METRIC, false, true) \
568  CALL_TEMPLATE(METRIC, false, false)
569 
570 #define CALL_TEMPLATE3 \
571  CALL_TEMPLATE2(L1) \
572  CALL_TEMPLATE2(L2) \
573  CALL_TEMPLATE2(Linf)
574 
576 
577 #undef CALL_TEMPLATE
578 #undef CALL_TEMPLATE2
579 #undef CALL_TEMPLATE3
580 #undef FN_PARAMETERS
581 }
582 
583 } // namespace impl
584 } // namespace nns
585 } // namespace core
586 } // namespace open3d
#define VECSIZE
#define CALL_TEMPLATE3
int points
Definition: FilePCD.cpp:54
void FixedRadiusSearchCPU(int64_t *query_neighbors_row_splits, const size_t num_points, const T *const points, const size_t num_queries, const T *const queries, const T radius, const size_t points_row_splits_size, const int64_t *const points_row_splits, const size_t queries_row_splits_size, const int64_t *const queries_row_splits, const uint32_t *const hash_table_splits, const size_t hash_table_cell_splits_size, const uint32_t *const hash_table_cell_splits, const uint32_t *const hash_table_index, const Metric metric, const bool ignore_query_point, const bool return_distances, OUTPUT_ALLOCATOR &output_allocator)
Definition: FixedRadiusSearchImpl.h:530
Metric
Supported metrics.
Definition: NeighborSearchCommon.h:19
@ Linf
Definition: NeighborSearchCommon.h:19
@ L1
Definition: NeighborSearchCommon.h:19
@ L2
Definition: NeighborSearchCommon.h:19
void BuildSpatialHashTableCPU(const Tensor &points, double radius, const Tensor &points_row_splits, const Tensor &hash_table_splits, Tensor &hash_table_index, Tensor &hash_table_cell_splits)
Definition: FixedRadiusSearchOps.cpp:21
uint32_t AtomicFetchAddRelaxed(uint32_t *address, uint32_t val)
Definition: Atomic.h:25
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle temperature_c k4a_image_t image_handle uint8_t image_handle image_handle image_handle image_handle uint32_t
Definition: K4aPlugin.cpp:548
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle temperature_c int
Definition: K4aPlugin.cpp:474
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample uint64_t
Definition: K4aPlugin.cpp:343
Definition: Dispatch.h:91
void InclusivePrefixSum(const Tin *first, const Tin *last, Tout *out)
Definition: ParallelScan.h:71
Definition: PinholeCameraIntrinsic.cpp:16
Definition: MiniVec.h:24