35 template <
typename Hash,
typename KeyEq>
40 __host__
void Setup(
size_t init_buckets,
47 __device__
bool Insert(
bool lane_active,
86 bucket_id * kWarpSize + lane_id;
104 template <
typename Hash,
typename KeyEq>
106 const void* input_keys,
107 addr_t* output_iterator_addrs,
108 int heap_counter_prev,
111 template <
typename Hash,
typename KeyEq>
113 const void* input_keys,
114 addr_t* input_iterator_addrs,
118 template <
typename Hash,
typename KeyEq>
120 const void* input_values,
121 addr_t* input_iterator_addrs,
126 template <
typename Hash,
typename KeyEq>
128 const void* input_keys,
133 template <
typename Hash,
typename KeyEq>
135 const void* input_keys,
136 addr_t* output_iterator_addrs,
140 template <
typename Hash,
typename KeyEq>
142 addr_t* input_iterator_addrs,
146 template <
typename Hash,
typename KeyEq>
151 template <
typename Hash,
typename KeyEq>
154 size_t* bucket_elem_counts);
157 const bool* input_masks,
162 size_t iterator_count);
165 const bool* input_masks,
166 const void* input_values,
168 size_t iterator_count);
__device__ Pair< addr_t, bool > Find(bool lane_active, uint32_t lane_id, uint32_t bucket_id, const void *key_ptr)
Hash hash_fn_
Definition: HashmapCUDAImpl.h:90
__device__ size_t ComputeBucket(const void *key_ptr) const
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle temperature_c k4a_image_t image_handle uint8_t image_handle image_handle image_handle image_handle uint32_t
Definition: K4aPlugin.cpp:554
__device__ void WarpSyncKey(const void *key_ptr, uint32_t lane_id, void *ret_key_ptr)
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t int32_t
Definition: K4aPlugin.cpp:395
__device__ addr_t AllocateSlab(uint32_t lane_id)
__global__ void InsertKernelPass1(CUDAHashmapImplContext< Hash, KeyEq > hash_ctx, const void *input_keys, addr_t *input_iterator_addrs, bool *output_masks, size_t count)
KeyEq cmp_fn_
Definition: HashmapCUDAImpl.h:91
Definition: HashmapCUDAImpl.h:36
__device__ addr_t * get_unit_ptr_from_list_head(uint32_t bucket_id, uint32_t lane_id)
Definition: HashmapCUDAImpl.h:83
__global__ void UnpackIteratorsKernel(const iterator_t *input_iterators, const bool *input_masks, void *output_keys, void *output_values, size_t dsize_key, size_t dsize_value, size_t iterator_count)
__global__ void InsertKernelPass2(CUDAHashmapImplContext< Hash, KeyEq > hash_ctx, const void *input_values, addr_t *input_iterator_addrs, iterator_t *output_iterators, bool *output_masks, size_t count)
Slab * bucket_list_head_
Definition: HashmapCUDAImpl.h:98
size_t bucket_count_
Definition: HashmapCUDAImpl.h:93
__global__ void FindKernel(CUDAHashmapImplContext< Hash, KeyEq > hash_ctx, const void *input_keys, iterator_t *output_iterators, bool *output_masks, size_t count)
__device__ int32_t WarpFindEmpty(addr_t unit_data)
__global__ void EraseKernelPass0(CUDAHashmapImplContext< Hash, KeyEq > hash_ctx, const void *input_keys, addr_t *output_iterator_addrs, bool *output_masks, size_t count)
__global__ void EraseKernelPass1(CUDAHashmapImplContext< Hash, KeyEq > hash_ctx, addr_t *input_iterator_addrs, bool *output_masks, size_t count)
Definition: InternalNodeManager.h:71
int count
Definition: FilePCD.cpp:61
__device__ int32_t WarpFindKey(const void *src_key_ptr, uint32_t lane_id, addr_t ptr)
size_t dsize_value_
Definition: HashmapCUDAImpl.h:96
__device__ void FreeSlab(addr_t slab_ptr)
Definition: PinholeCameraIntrinsic.cpp:35
size_t capacity_
Definition: HashmapCUDAImpl.h:94
size_t dsize_key_
Definition: HashmapCUDAImpl.h:95
__global__ void GetIteratorsKernel(CUDAHashmapImplContext< Hash, KeyEq > hash_ctx, iterator_t *output_iterators, uint32_t *output_iterator_count)
__device__ Pair< addr_t, bool > Erase(bool lane_active, uint32_t lane_id, uint32_t bucket_id, const void *key_ptr)
uint32_t addr_t
Definition: Traits.h:49
__global__ void AssignIteratorsKernel(iterator_t *input_iterators, const bool *input_masks, const void *input_values, size_t dsize_value, size_t iterator_count)
Definition: InternalKvPairManager.h:44
__device__ __forceinline__ uint32_t * get_unit_ptr_from_slab(const addr_t &next_slab_ptr, const uint32_t &lane_id)
Definition: InternalNodeManager.h:80
Definition: InternalNodeManager.h:65
InternalKvPairManagerContext kv_mgr_ctx_
Definition: HashmapCUDAImpl.h:100
__global__ void InsertKernelPass0(CUDAHashmapImplContext< Hash, KeyEq > hash_ctx, const void *input_keys, addr_t *output_iterator_addrs, int heap_counter_prev, size_t count)
Kernels.
__host__ void Setup(size_t init_buckets, size_t init_capacity, size_t dsize_key, size_t dsize_value, const InternalNodeManagerContext &node_mgr_ctx, const InternalKvPairManagerContext &kv_mgr_ctx)
InternalNodeManagerContext node_mgr_ctx_
Definition: HashmapCUDAImpl.h:99
__device__ addr_t * get_unit_ptr_from_list_nodes(addr_t slab_ptr, uint32_t lane_id)
Definition: HashmapCUDAImpl.h:79
__device__ bool Insert(bool lane_active, uint32_t lane_id, uint32_t bucket_id, const void *key_ptr, addr_t iterator_addr)
__global__ void CountElemsPerBucketKernel(CUDAHashmapImplContext< Hash, KeyEq > hash_ctx, size_t *bucket_elem_counts)