Open3D (C++ API)  0.11.0
HashmapCUDAImpl.h
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - Open3D: www.open3d.org -
3 // ----------------------------------------------------------------------------
4 // The MIT License (MIT)
5 //
6 // Copyright (c) 2018 www.open3d.org
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
14 //
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
17 //
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 // IN THE SOFTWARE.
25 // ----------------------------------------------------------------------------
26 
31 
32 namespace open3d {
33 namespace core {
34 
35 template <typename Hash, typename KeyEq>
37 public:
39 
40  __host__ void Setup(size_t init_buckets,
41  size_t init_capacity,
42  size_t dsize_key,
43  size_t dsize_value,
44  const InternalNodeManagerContext& node_mgr_ctx,
45  const InternalKvPairManagerContext& kv_mgr_ctx);
46 
47  __device__ bool Insert(bool lane_active,
48  uint32_t lane_id,
49  uint32_t bucket_id,
50  const void* key_ptr,
51  addr_t iterator_addr);
52 
53  __device__ Pair<addr_t, bool> Find(bool lane_active,
54  uint32_t lane_id,
55  uint32_t bucket_id,
56  const void* key_ptr);
57 
58  __device__ Pair<addr_t, bool> Erase(bool lane_active,
59  uint32_t lane_id,
60  uint32_t bucket_id,
61  const void* key_ptr);
62 
63  __device__ void WarpSyncKey(const void* key_ptr,
64  uint32_t lane_id,
65  void* ret_key_ptr);
66  __device__ int32_t WarpFindKey(const void* src_key_ptr,
67  uint32_t lane_id,
68  addr_t ptr);
69  __device__ int32_t WarpFindEmpty(addr_t unit_data);
70 
71  // Hash function.
72  __device__ size_t ComputeBucket(const void* key_ptr) const;
73 
74  // Node manager.
75  __device__ addr_t AllocateSlab(uint32_t lane_id);
76  __device__ void FreeSlab(addr_t slab_ptr);
77 
78  // Helpers.
80  uint32_t lane_id) {
81  return node_mgr_ctx_.get_unit_ptr_from_slab(slab_ptr, lane_id);
82  }
84  uint32_t lane_id) {
85  return reinterpret_cast<uint32_t*>(bucket_list_head_) +
86  bucket_id * kWarpSize + lane_id;
87  }
88 
89 public:
90  Hash hash_fn_;
91  KeyEq cmp_fn_;
92 
93  size_t bucket_count_;
94  size_t capacity_;
95  size_t dsize_key_;
96  size_t dsize_value_;
97 
101 };
102 
104 template <typename Hash, typename KeyEq>
106  const void* input_keys,
107  addr_t* output_iterator_addrs,
108  int heap_counter_prev,
109  size_t count);
110 
111 template <typename Hash, typename KeyEq>
113  const void* input_keys,
114  addr_t* input_iterator_addrs,
115  bool* output_masks,
116  size_t count);
117 
118 template <typename Hash, typename KeyEq>
120  const void* input_values,
121  addr_t* input_iterator_addrs,
122  iterator_t* output_iterators,
123  bool* output_masks,
124  size_t count);
125 
126 template <typename Hash, typename KeyEq>
127 __global__ void FindKernel(CUDAHashmapImplContext<Hash, KeyEq> hash_ctx,
128  const void* input_keys,
129  iterator_t* output_iterators,
130  bool* output_masks,
131  size_t count);
132 
133 template <typename Hash, typename KeyEq>
135  const void* input_keys,
136  addr_t* output_iterator_addrs,
137  bool* output_masks,
138  size_t count);
139 
140 template <typename Hash, typename KeyEq>
142  addr_t* input_iterator_addrs,
143  bool* output_masks,
144  size_t count);
145 
146 template <typename Hash, typename KeyEq>
148  iterator_t* output_iterators,
149  uint32_t* output_iterator_count);
150 
151 template <typename Hash, typename KeyEq>
152 __global__ void CountElemsPerBucketKernel(
154  size_t* bucket_elem_counts);
155 
156 __global__ void UnpackIteratorsKernel(const iterator_t* input_iterators,
157  const bool* input_masks,
158  void* output_keys,
159  void* output_values,
160  size_t dsize_key,
161  size_t dsize_value,
162  size_t iterator_count);
163 
164 __global__ void AssignIteratorsKernel(iterator_t* input_iterators,
165  const bool* input_masks,
166  const void* input_values,
167  size_t dsize_value,
168  size_t iterator_count);
169 
170 } // namespace core
171 } // namespace open3d
__device__ Pair< addr_t, bool > Find(bool lane_active, uint32_t lane_id, uint32_t bucket_id, const void *key_ptr)
Hash hash_fn_
Definition: HashmapCUDAImpl.h:90
__device__ size_t ComputeBucket(const void *key_ptr) const
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle temperature_c k4a_image_t image_handle uint8_t image_handle image_handle image_handle image_handle uint32_t
Definition: K4aPlugin.cpp:554
__device__ void WarpSyncKey(const void *key_ptr, uint32_t lane_id, void *ret_key_ptr)
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t int32_t
Definition: K4aPlugin.cpp:395
__device__ addr_t AllocateSlab(uint32_t lane_id)
__global__ void InsertKernelPass1(CUDAHashmapImplContext< Hash, KeyEq > hash_ctx, const void *input_keys, addr_t *input_iterator_addrs, bool *output_masks, size_t count)
KeyEq cmp_fn_
Definition: HashmapCUDAImpl.h:91
Definition: HashmapCUDAImpl.h:36
__device__ addr_t * get_unit_ptr_from_list_head(uint32_t bucket_id, uint32_t lane_id)
Definition: HashmapCUDAImpl.h:83
Definition: Traits.h:61
__global__ void UnpackIteratorsKernel(const iterator_t *input_iterators, const bool *input_masks, void *output_keys, void *output_values, size_t dsize_key, size_t dsize_value, size_t iterator_count)
__global__ void InsertKernelPass2(CUDAHashmapImplContext< Hash, KeyEq > hash_ctx, const void *input_values, addr_t *input_iterator_addrs, iterator_t *output_iterators, bool *output_masks, size_t count)
Slab * bucket_list_head_
Definition: HashmapCUDAImpl.h:98
size_t bucket_count_
Definition: HashmapCUDAImpl.h:93
__global__ void FindKernel(CUDAHashmapImplContext< Hash, KeyEq > hash_ctx, const void *input_keys, iterator_t *output_iterators, bool *output_masks, size_t count)
__device__ int32_t WarpFindEmpty(addr_t unit_data)
__global__ void EraseKernelPass0(CUDAHashmapImplContext< Hash, KeyEq > hash_ctx, const void *input_keys, addr_t *output_iterator_addrs, bool *output_masks, size_t count)
__global__ void EraseKernelPass1(CUDAHashmapImplContext< Hash, KeyEq > hash_ctx, addr_t *input_iterator_addrs, bool *output_masks, size_t count)
Definition: Traits.h:51
Definition: InternalNodeManager.h:71
int count
Definition: FilePCD.cpp:61
__device__ int32_t WarpFindKey(const void *src_key_ptr, uint32_t lane_id, addr_t ptr)
size_t dsize_value_
Definition: HashmapCUDAImpl.h:96
__device__ void FreeSlab(addr_t slab_ptr)
Definition: PinholeCameraIntrinsic.cpp:35
size_t capacity_
Definition: HashmapCUDAImpl.h:94
size_t dsize_key_
Definition: HashmapCUDAImpl.h:95
__global__ void GetIteratorsKernel(CUDAHashmapImplContext< Hash, KeyEq > hash_ctx, iterator_t *output_iterators, uint32_t *output_iterator_count)
__device__ Pair< addr_t, bool > Erase(bool lane_active, uint32_t lane_id, uint32_t bucket_id, const void *key_ptr)
uint32_t addr_t
Definition: Traits.h:49
__global__ void AssignIteratorsKernel(iterator_t *input_iterators, const bool *input_masks, const void *input_values, size_t dsize_value, size_t iterator_count)
Definition: InternalKvPairManager.h:44
__device__ __forceinline__ uint32_t * get_unit_ptr_from_slab(const addr_t &next_slab_ptr, const uint32_t &lane_id)
Definition: InternalNodeManager.h:80
Definition: InternalNodeManager.h:65
InternalKvPairManagerContext kv_mgr_ctx_
Definition: HashmapCUDAImpl.h:100
__global__ void InsertKernelPass0(CUDAHashmapImplContext< Hash, KeyEq > hash_ctx, const void *input_keys, addr_t *output_iterator_addrs, int heap_counter_prev, size_t count)
Kernels.
__host__ void Setup(size_t init_buckets, size_t init_capacity, size_t dsize_key, size_t dsize_value, const InternalNodeManagerContext &node_mgr_ctx, const InternalKvPairManagerContext &kv_mgr_ctx)
InternalNodeManagerContext node_mgr_ctx_
Definition: HashmapCUDAImpl.h:99
__device__ addr_t * get_unit_ptr_from_list_nodes(addr_t slab_ptr, uint32_t lane_id)
Definition: HashmapCUDAImpl.h:79
__device__ bool Insert(bool lane_active, uint32_t lane_id, uint32_t bucket_id, const void *key_ptr, addr_t iterator_addr)
__global__ void CountElemsPerBucketKernel(CUDAHashmapImplContext< Hash, KeyEq > hash_ctx, size_t *bucket_elem_counts)