19 template <
typename Key,
typename Hash,
typename Eq>
24 const std::vector<int64_t>& value_dsizes,
29 void Reserve(int64_t capacity)
override;
31 void Insert(
const void* input_keys,
32 const std::vector<const void*>& input_values_soa,
35 int64_t
count)
override;
37 void Find(
const void* input_keys,
40 int64_t
count)
override;
42 void Erase(
const void* input_keys,
44 int64_t
count)
override;
47 void Clear()
override;
49 int64_t
Size()
const override;
56 void Allocate(int64_t capacity)
override;
70 template <
typename Key,
typename Hash,
typename Eq>
72 int64_t init_capacity,
74 const std::vector<int64_t>& value_dsizes,
81 template <
typename Key,
typename Hash,
typename Eq>
87 template <
typename Key,
typename Hash,
typename Eq>
92 template <
typename Key,
typename Hash,
typename Eq>
98 if (
count == 0)
return;
104 const int64_t num_blocks =
105 (
count + kThreadsPerBlock - 1) / kThreadsPerBlock;
106 FindKernel<<<num_blocks, kThreadsPerBlock, 0, core::cuda::GetStream()>>>(
107 impl_, input_keys, output_buf_indices, output_masks,
count);
112 template <
typename Key,
typename Hash,
typename Eq>
117 if (
count == 0)
return;
125 const int64_t num_blocks =
126 (
count + kThreadsPerBlock - 1) / kThreadsPerBlock;
128 core::cuda::GetStream()>>>(
129 impl_, input_keys, buf_indices, output_masks,
count);
131 core::cuda::GetStream()>>>(impl_, buf_indices,
132 output_masks,
count);
139 template <
typename Key,
typename Hash,
typename Eq>
150 const int64_t num_blocks =
151 (impl_.bucket_count_ * kWarpSize + kThreadsPerBlock - 1) /
154 core::cuda::GetStream()>>>(
155 impl_, output_buf_indices,
count);
163 return static_cast<int64_t
>(ret);
166 template <
typename Key,
typename Hash,
typename Eq>
170 this->buffer_->ResetHeap();
174 sizeof(
Slab) * this->bucket_count_));
182 template <
typename Key,
typename Hash,
typename Eq>
185 return this->buffer_->GetHeapTopIndex();
188 template <
typename Key,
typename Hash,
typename Eq>
191 return bucket_count_;
194 template <
typename Key,
typename Hash,
typename Eq>
197 thrust::device_vector<int64_t> elems_per_bucket(impl_.bucket_count_);
198 thrust::fill(elems_per_bucket.begin(), elems_per_bucket.end(), 0);
200 const int64_t num_blocks =
201 (impl_.buffer_accessor_.capacity_ + kThreadsPerBlock - 1) /
204 core::cuda::GetStream()>>>(
205 impl_, thrust::raw_pointer_cast(elems_per_bucket.data()));
209 std::vector<int64_t>
result(impl_.bucket_count_);
210 thrust::copy(elems_per_bucket.begin(), elems_per_bucket.end(),
215 template <
typename Key,
typename Hash,
typename Eq>
218 return float(Size()) /
float(this->bucket_count_);
221 template <
typename Key,
typename Hash,
typename Eq>
223 const void* input_keys,
224 const std::vector<const void*>& input_values_soa,
229 if (
count == 0)
return;
233 int prev_heap_top = this->buffer_->GetHeapTopIndex();
234 *thrust::device_ptr<int>(impl_.buffer_accessor_.heap_top_) =
235 prev_heap_top +
count;
237 const int64_t num_blocks =
238 (
count + kThreadsPerBlock - 1) / kThreadsPerBlock;
240 core::cuda::GetStream()>>>(
241 impl_, input_keys, output_buf_indices, prev_heap_top,
count);
243 core::cuda::GetStream()>>>(
244 impl_, input_keys, output_buf_indices, output_masks,
count);
246 thrust::device_vector<const void*> input_values_soa_device(
247 input_values_soa.begin(), input_values_soa.end());
249 int64_t n_values = input_values_soa.size();
250 const void*
const* ptr_input_values_soa =
251 thrust::raw_pointer_cast(input_values_soa_device.data());
252 DISPATCH_DIVISOR_SIZE_TO_BLOCK_T(
253 impl_.buffer_accessor_.common_block_size_, [&]() {
254 InsertKernelPass2<Key, Hash, Eq, block_t>
255 <<<num_blocks, kThreadsPerBlock, 0,
256 core::cuda::GetStream()>>>(
257 impl_, ptr_input_values_soa, output_buf_indices,
258 output_masks, count, n_values);
264 template <
typename Key,
typename Hash,
typename Eq>
267 this->bucket_count_ = capacity * 2;
268 this->capacity_ = capacity;
271 this->buffer_ = std::make_shared<HashBackendBuffer>(
272 this->capacity_, this->key_dsize_, this->value_dsizes_,
274 buffer_accessor_.Setup(*this->buffer_);
277 node_mgr_ = std::make_shared<SlabNodeManager>(this->device_);
281 sizeof(
Slab) * this->bucket_count_, this->device_));
283 sizeof(
Slab) * this->bucket_count_));
287 impl_.Setup(this->bucket_count_, node_mgr_->impl_, buffer_accessor_);
290 template <
typename Key,
typename Hash,
typename Eq>
293 buffer_accessor_.Shutdown(this->device_);
#define OPEN3D_CUDA_CHECK(err)
Definition: CUDAUtils.h:47
core::Tensor result
Definition: VtkUtils.cpp:75
bool copy
Definition: VtkUtils.cpp:73
Definition: CUDAHashBackendBufferAccessor.h:24
When CUDA is not enabled, this is a dummy class.
Definition: CUDAUtils.h:214
Definition: DeviceHashBackend.h:20
Device device_
Definition: DeviceHashBackend.h:100
static void MemcpyToHost(void *host_ptr, const void *src_ptr, const Device &src_device, size_t num_bytes)
Same as Memcpy, but with host (CPU:0) as default dst_device.
Definition: MemoryManager.cpp:85
static void * Malloc(size_t byte_size, const Device &device)
Definition: MemoryManager.cpp:22
static void Free(void *ptr, const Device &device)
Frees previously allocated memory at address ptr on device device.
Definition: MemoryManager.cpp:28
Definition: SlabHashBackend.h:20
SlabHashBackend(int64_t init_capacity, int64_t key_dsize, const std::vector< int64_t > &value_dsizes, const Device &device)
Definition: SlabHashBackend.h:71
void Free() override
Definition: SlabHashBackend.h:291
CUDAHashBackendBufferAccessor buffer_accessor_
Definition: SlabHashBackend.h:64
void Allocate(int64_t capacity) override
Definition: SlabHashBackend.h:265
~SlabHashBackend()
Definition: SlabHashBackend.h:82
float LoadFactor() const override
Get the current load factor, defined as size / bucket count.
Definition: SlabHashBackend.h:216
std::shared_ptr< SlabNodeManager > node_mgr_
Definition: SlabHashBackend.h:65
int64_t GetActiveIndices(buf_index_t *output_indices) override
Parallel collect all iterators in the hash table.
Definition: SlabHashBackend.h:140
SlabHashBackendImpl< Key, Hash, Eq > impl_
Definition: SlabHashBackend.h:62
void Insert(const void *input_keys, const std::vector< const void * > &input_values_soa, buf_index_t *output_buf_indices, bool *output_masks, int64_t count) override
Parallel insert contiguous arrays of keys and values.
Definition: SlabHashBackend.h:222
SlabHashBackendImpl< Key, Hash, Eq > GetImpl()
Definition: SlabHashBackend.h:54
int64_t bucket_count_
Definition: SlabHashBackend.h:67
int64_t Size() const override
Get the size (number of valid entries) of the hash map.
Definition: SlabHashBackend.h:183
int64_t GetBucketCount() const override
Get the number of buckets of the hash map.
Definition: SlabHashBackend.h:189
void Reserve(int64_t capacity) override
Definition: SlabHashBackend.h:88
void Clear() override
Clear stored map without reallocating memory.
Definition: SlabHashBackend.h:167
std::vector< int64_t > BucketSizes() const override
Get the number of entries per bucket.
Definition: SlabHashBackend.h:195
void Find(const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count) override
Parallel find a contiguous array of keys.
Definition: SlabHashBackend.h:93
void Erase(const void *input_keys, bool *output_masks, int64_t count) override
Parallel erase a contiguous array of keys.
Definition: SlabHashBackend.h:113
Definition: SlabHashBackendImpl.h:45
Definition: SlabNodeManager.h:39
void Synchronize()
Definition: CUDAUtils.cpp:58
__global__ void InsertKernelPass1(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition: SlabHashBackendImpl.h:493
__global__ void InsertKernelPass0(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, int heap_counter_prev, int64_t count)
Kernels.
Definition: SlabHashBackendImpl.h:474
uint32_t buf_index_t
Definition: HashBackendBuffer.h:44
__global__ void EraseKernelPass1(SlabHashBackendImpl< Key, Hash, Eq > impl, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition: SlabHashBackendImpl.h:636
__global__ void FindKernel(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition: SlabHashBackendImpl.h:562
__global__ void EraseKernelPass0(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition: SlabHashBackendImpl.h:601
__global__ void GetActiveIndicesKernel(SlabHashBackendImpl< Key, Hash, Eq > impl, buf_index_t *output_buf_indices, uint32_t *output_count)
Definition: SlabHashBackendImpl.h:647
__global__ void CountElemsPerBucketKernel(SlabHashBackendImpl< Key, Hash, Eq > impl, int64_t *bucket_elem_counts)
Definition: SlabHashBackendImpl.h:687
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle temperature_c k4a_image_t image_handle uint8_t image_handle image_handle image_handle image_handle uint32_t
Definition: K4aPlugin.cpp:548
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle float
Definition: K4aPlugin.cpp:460
Definition: PinholeCameraIntrinsic.cpp:16