10 #include <stdgpu/memory.h>
11 #include <stdgpu/utility.h>
12 #include <thrust/device_vector.h>
13 #include <thrust/transform.h>
15 #include <stdgpu/unordered_map.cuh>
16 #include <type_traits>
59 : std_allocator_(other.std_allocator_) {}
63 T* p = std_allocator_.allocate(n);
64 stdgpu::register_memory(p, n, stdgpu::dynamic_memory_type::device);
70 stdgpu::deregister_memory(p, n, stdgpu::dynamic_memory_type::device);
71 std_allocator_.deallocate(p, n);
76 return std_allocator_ == other.std_allocator_;
84 template <
typename T2>
92 template <
typename Key>
96 template <
typename Key,
typename Hash,
typename Eq>
98 stdgpu::unordered_map<Key,
104 template <
typename Key,
typename Hash,
typename Eq>
109 const std::vector<int64_t>& value_dsizes,
113 void Reserve(int64_t capacity)
override;
115 void Insert(
const void* input_keys,
116 const std::vector<const void*>& input_values_soa,
119 int64_t
count)
override;
121 void Find(
const void* input_keys,
124 int64_t
count)
override;
126 void Erase(
const void* input_keys,
128 int64_t
count)
override;
132 void Clear()
override;
134 int64_t
Size()
const override;
153 template <
typename Key,
typename Hash,
typename Eq>
155 int64_t init_capacity,
157 const std::vector<int64_t>& value_dsizes,
164 template <
typename Key,
typename Hash,
typename Eq>
170 template <
typename Key,
typename Hash,
typename Eq>
177 template <
typename Key,
typename Hash,
typename Eq>
180 const Key* input_keys,
184 uint32_t tid = threadIdx.x + blockIdx.x * blockDim.x;
185 if (tid >=
count)
return;
187 Key key = input_keys[tid];
188 auto iter = map.find(key);
189 bool flag = (iter != map.end());
190 output_masks[tid] = flag;
191 output_buf_indices[tid] = flag ? iter->second : 0;
194 template <
typename Key,
typename Hash,
typename Eq>
204 impl_, buffer_accessor_,
static_cast<const Key*
>(input_keys),
205 output_buf_indices, output_masks,
count);
210 template <
typename Key,
typename Hash,
typename Eq>
213 const Key* input_keys,
217 uint32_t tid = threadIdx.x + blockIdx.x * blockDim.x;
218 if (tid >=
count)
return;
220 Key key = input_keys[tid];
221 auto iter = map.find(key);
222 bool flag = (iter != map.end());
223 output_masks[tid] = flag;
224 output_buf_indices[tid] = flag ? iter->second : 0;
226 if (output_masks[tid]) {
227 output_masks[tid] = map.erase(key);
228 if (output_masks[tid]) {
229 buffer_accessor.
DeviceFree(output_buf_indices[tid]);
234 template <
typename Key,
typename Hash,
typename Eq>
248 impl_, buffer_accessor_,
static_cast<const Key*
>(input_keys),
249 output_buf_indices, output_masks,
count);
253 template <
typename Key>
261 template <
typename Key,
typename Hash,
typename Eq>
265 auto range = impl_.device_range();
267 thrust::transform(range.begin(), range.end(), output_indices,
273 template <
typename Key,
typename Hash,
typename Eq>
277 this->buffer_->ResetHeap();
280 template <
typename Key,
typename Hash,
typename Eq>
285 template <
typename Key,
typename Hash,
typename Eq>
288 return impl_.bucket_count();
291 template <
typename Key,
typename Hash,
typename Eq>
297 template <
typename Key,
typename Hash,
typename Eq>
300 return impl_.load_factor();
304 template <
typename Key,
typename Hash,
typename Eq,
typename block_t>
308 const Key* input_keys,
309 const void*
const* input_values_soa,
314 uint32_t tid = threadIdx.x + blockIdx.x * blockDim.x;
315 if (tid >=
count)
return;
317 Key key = input_keys[tid];
318 output_buf_indices[tid] = 0;
319 output_masks[tid] =
false;
322 auto res = map.emplace(key, 0);
327 auto key_ptr = buffer_accessor.
GetKeyPtr(buf_index);
331 *
static_cast<Key*
>(key_ptr) = key;
334 for (
int j = 0; j < n_values; ++j) {
335 const int64_t blocks_per_element =
338 block_t* dst_value =
static_cast<block_t*
>(
340 const block_t* src_value =
341 static_cast<const block_t*
>(input_values_soa[j]) +
342 blocks_per_element * tid;
343 for (
int b = 0; b < blocks_per_element; ++b) {
344 dst_value[b] = src_value[b];
349 res.first->second = buf_index;
352 output_buf_indices[tid] = buf_index;
353 output_masks[tid] =
true;
357 template <
typename Key,
typename Hash,
typename Eq>
359 const void* input_keys,
360 const std::vector<const void*>& input_values_soa,
368 thrust::device_vector<const void*> input_values_soa_device(
369 input_values_soa.begin(), input_values_soa.end());
371 int64_t n_values = input_values_soa.size();
372 const void*
const* ptr_input_values_soa =
373 thrust::raw_pointer_cast(input_values_soa_device.data());
375 DISPATCH_DIVISOR_SIZE_TO_BLOCK_T(
376 buffer_accessor_.common_block_size_, [&]() {
377 STDGPUInsertKernel<Key, Hash, Eq, block_t>
378 <<<blocks, threads, 0, core::cuda::GetStream()>>>(
379 impl_, buffer_accessor_,
380 static_cast<const Key*>(input_keys),
381 ptr_input_values_soa, output_buf_indices,
382 output_masks, count, n_values);
387 template <
typename Key,
typename Hash,
typename Eq>
390 this->capacity_ = capacity;
393 this->buffer_ = std::make_shared<HashBackendBuffer>(
394 this->capacity_, this->key_dsize_, this->value_dsizes_,
396 buffer_accessor_.Setup(*this->buffer_);
401 CUDAScopedStream scoped_stream(cuda::GetDefaultStream());
410 template <
typename Key,
typename Hash,
typename Eq>
414 buffer_accessor_.Shutdown(this->device_);
419 CUDAScopedStream scoped_stream(cuda::GetDefaultStream());
#define OPEN3D_HOST_DEVICE
Definition: CUDAUtils.h:44
#define LogError(...)
Definition: Logging.h:51
Definition: CUDAHashBackendBufferAccessor.h:24
int64_t * value_blocks_per_element_
Definition: CUDAHashBackendBufferAccessor.h:108
__device__ buf_index_t DeviceAllocate()
Definition: CUDAHashBackendBufferAccessor.h:79
__device__ void DeviceFree(buf_index_t ptr)
Definition: CUDAHashBackendBufferAccessor.h:83
__device__ void * GetKeyPtr(buf_index_t ptr)
Definition: CUDAHashBackendBufferAccessor.h:88
__device__ void * GetValuePtr(buf_index_t ptr, int value_idx=0)
Definition: CUDAHashBackendBufferAccessor.h:91
When CUDA is not enabled, this is a dummy class.
Definition: CUDAUtils.h:214
Definition: DeviceHashBackend.h:20
Device device_
Definition: DeviceHashBackend.h:100
Definition: StdAllocator.h:23
Definition: StdGPUHashBackend.h:33
T * allocate(std::size_t n)
Allocates memory of size n.
Definition: StdGPUHashBackend.h:62
StdGPUAllocator()=default
Default constructor.
StdGPUAllocator(int device_id)
Constructor from device.
Definition: StdGPUHashBackend.h:42
StdGPUAllocator(const StdGPUAllocator &)=default
Default copy constructor.
void deallocate(T *p, std::size_t n)
Deallocates memory from pointer p of size n .
Definition: StdGPUHashBackend.h:69
bool operator==(const StdGPUAllocator &other)
Returns true if the instances are equal, false otherwise.
Definition: StdGPUHashBackend.h:75
StdGPUAllocator(StdGPUAllocator &&)=default
Default move constructor.
T value_type
T.
Definition: StdGPUHashBackend.h:36
StdGPUAllocator & operator=(StdGPUAllocator &&)=default
Default move assignment operator.
StdGPUAllocator & operator=(const StdGPUAllocator &)=default
Default copy assignment operator.
bool operator!=(const StdGPUAllocator &other)
Returns true if the instances are not equal, false otherwise.
Definition: StdGPUHashBackend.h:80
StdGPUAllocator(const StdGPUAllocator< U > &other)
Rebind copy constructor.
Definition: StdGPUHashBackend.h:58
Definition: StdGPUHashBackend.h:105
StdGPUHashBackend(int64_t init_capacity, int64_t key_dsize, const std::vector< int64_t > &value_dsizes, const Device &device)
Definition: StdGPUHashBackend.h:154
void Erase(const void *input_keys, bool *output_masks, int64_t count) override
Parallel erase a contiguous array of keys.
Definition: StdGPUHashBackend.h:235
~StdGPUHashBackend()
Definition: StdGPUHashBackend.h:165
float LoadFactor() const override
Get the current load factor, defined as size / bucket count.
Definition: StdGPUHashBackend.h:298
InternalStdGPUHashBackend< Key, Hash, Eq > GetImpl() const
Definition: StdGPUHashBackend.h:140
void Find(const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count) override
Parallel find a contiguous array of keys.
Definition: StdGPUHashBackend.h:195
void Free()
Definition: StdGPUHashBackend.h:411
void Insert(const void *input_keys, const std::vector< const void * > &input_values_soa, buf_index_t *output_buf_indices, bool *output_masks, int64_t count) override
Parallel insert contiguous arrays of keys and values.
Definition: StdGPUHashBackend.h:358
std::vector< int64_t > BucketSizes() const override
Get the number of entries per bucket.
Definition: StdGPUHashBackend.h:292
InternalStdGPUHashBackend< Key, Hash, Eq > impl_
Definition: StdGPUHashBackend.h:148
void Reserve(int64_t capacity) override
Definition: StdGPUHashBackend.h:281
int64_t GetBucketCount() const override
Get the number of buckets of the hash map.
Definition: StdGPUHashBackend.h:286
int64_t GetActiveIndices(buf_index_t *output_indices) override
Parallel collect all iterators in the hash table.
Definition: StdGPUHashBackend.h:262
int64_t Size() const override
Get the size (number of valid entries) of the hash map.
Definition: StdGPUHashBackend.h:171
void Allocate(int64_t capacity)
Definition: StdGPUHashBackend.h:388
CUDAHashBackendBufferAccessor buffer_accessor_
Definition: StdGPUHashBackend.h:150
void Clear() override
Clear stored map without reallocating memory.
Definition: StdGPUHashBackend.h:274
T * GetDataPtr()
Definition: Tensor.h:1143
void Synchronize()
Definition: CUDAUtils.cpp:58
__global__ void STDGPUFindKernel(InternalStdGPUHashBackend< Key, Hash, Eq > map, CUDAHashBackendBufferAccessor buffer_accessor, const Key *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition: StdGPUHashBackend.h:178
uint32_t buf_index_t
Definition: HashBackendBuffer.h:44
__global__ void STDGPUEraseKernel(InternalStdGPUHashBackend< Key, Hash, Eq > map, CUDAHashBackendBufferAccessor buffer_accessor, const Key *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition: StdGPUHashBackend.h:211
const Dtype Int32
Definition: Dtype.cpp:46
stdgpu::unordered_map< Key, buf_index_t, Hash, Eq, InternalStdGPUHashBackendAllocator< Key > > InternalStdGPUHashBackend
Definition: StdGPUHashBackend.h:102
__global__ void STDGPUInsertKernel(InternalStdGPUHashBackend< Key, Hash, Eq > map, CUDAHashBackendBufferAccessor buffer_accessor, const Key *input_keys, const void *const *input_values_soa, buf_index_t *output_buf_indices, bool *output_masks, int64_t count, int64_t n_values)
Definition: StdGPUHashBackend.h:305
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle temperature_c k4a_image_t image_handle uint8_t image_handle image_handle image_handle image_handle uint32_t
Definition: K4aPlugin.cpp:548
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle temperature_c k4a_image_t image_handle uint8_t image_handle image_handle image_handle image_handle image_handle timestamp_usec white_balance image_handle k4a_device_configuration_t config device_handle char size_t serial_number_size bool int32_t int32_t int32_t int32_t k4a_color_control_mode_t default_mode value const const k4a_calibration_t calibration char size_t
Definition: K4aPlugin.cpp:719
Definition: PinholeCameraIntrinsic.cpp:16