39 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 40 #define OPEN3D_ATOMIC_ADD(X, Y) atomicAdd(X, Y) 42 #define OPEN3D_ATOMIC_ADD(X, Y) (*X).fetch_add(Y) 45 #define DISPATCH_BYTESIZE_TO_VOXEL(BYTESIZE, ...) \ 47 if (BYTESIZE == sizeof(ColoredVoxel32f)) { \ 48 using voxel_t = ColoredVoxel32f; \ 49 return __VA_ARGS__(); \ 50 } else if (BYTESIZE == sizeof(ColoredVoxel16i)) { \ 51 using voxel_t = ColoredVoxel16i; \ 52 return __VA_ARGS__(); \ 53 } else if (BYTESIZE == sizeof(Voxel32f)) { \ 54 using voxel_t = Voxel32f; \ 55 return __VA_ARGS__(); \ 57 utility::LogError("Unsupported voxel bytesize"); \ 80 tsdf = (weight * tsdf + dsdf) / (weight + 1);
87 printf(
"[Voxel32f] should never reach here.\n");
98 static const uint16_t kMaxUint16 = 65535;
99 static constexpr
float kColorFactor = 255.0f;
112 return static_cast<float>(r / kColorFactor);
115 return static_cast<float>(g / kColorFactor);
118 return static_cast<float>(b / kColorFactor);
121 float inc_wsum =
static_cast<float>(
weight) + 1;
122 float inv_wsum = 1.0f / inc_wsum;
123 tsdf = (
static_cast<float>(
weight) * tsdf + dsdf) * inv_wsum;
124 weight =
static_cast<uint16_t
>(inc_wsum < static_cast<float>(kMaxUint16)
132 float inc_wsum =
static_cast<float>(
weight) + 1;
133 float inv_wsum = 1.0f / inc_wsum;
134 tsdf = (weight * tsdf + dsdf) * inv_wsum;
135 r =
static_cast<uint16_t
>(
136 round((weight * r + dr * kColorFactor) * inv_wsum));
137 g =
static_cast<uint16_t
>(
138 round((weight * g + dg * kColorFactor) * inv_wsum));
139 b =
static_cast<uint16_t
>(
140 round((weight * b + db * kColorFactor) * inv_wsum));
141 weight =
static_cast<uint16_t
>(inc_wsum < static_cast<float>(kMaxUint16)
164 float inv_wsum = 1.0f / (weight + 1);
165 tsdf = (weight * tsdf + dsdf) * inv_wsum;
172 float inv_wsum = 1.0f / (weight + 1);
173 tsdf = (weight * tsdf + dsdf) * inv_wsum;
174 r = (weight * r + dr) * inv_wsum;
175 g = (weight * g + dg) * inv_wsum;
176 b = (weight * b + db) * inv_wsum;
183 template <
typename voxel_t>
193 int xn = (xo + resolution) % resolution;
194 int yn = (yo + resolution) % resolution;
195 int zn = (zo + resolution) % resolution;
197 int64_t dxb = sign(xo - xn);
198 int64_t dyb = sign(yo - yn);
199 int64_t dzb = sign(zo - zn);
201 int64_t nb_idx = (dxb + 1) + (dyb + 1) * 3 + (dzb + 1) * 9;
203 bool block_mask_i = *
static_cast<bool*
>(
205 if (!block_mask_i)
return nullptr;
207 int64_t block_idx_i =
209 curr_block_idx, nb_idx));
211 return static_cast<voxel_t*
>(
217 template <
typename voxel_t>
229 auto GetVoxelAt = [&]
OPEN3D_DEVICE(
int xo,
int yo,
int zo) {
230 return DeviceGetVoxelAt<voxel_t>(
231 xo, yo, zo, curr_block_idx, resolution, nb_block_masks_indexer,
232 nb_block_indices_indexer, blocks_indexer);
234 voxel_t* vxp = GetVoxelAt(xo + 1, yo, zo);
235 voxel_t* vxn = GetVoxelAt(xo - 1, yo, zo);
236 voxel_t* vyp = GetVoxelAt(xo, yo + 1, zo);
237 voxel_t* vyn = GetVoxelAt(xo, yo - 1, zo);
238 voxel_t* vzp = GetVoxelAt(xo, yo, zo + 1);
239 voxel_t* vzn = GetVoxelAt(xo, yo, zo - 1);
240 if (vxp && vxn) n[0] = (vxp->GetTSDF() - vxn->GetTSDF()) / (2 * voxel_size);
241 if (vyp && vyn) n[1] = (vyp->GetTSDF() - vyn->GetTSDF()) / (2 * voxel_size);
242 if (vzp && vzn) n[2] = (vzp->GetTSDF() - vzn->GetTSDF()) / (2 * voxel_size);
245 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 246 void CUDAUnprojectKernel
250 (
const std::unordered_map<std::string, Tensor>& srcs,
251 std::unordered_map<std::string, Tensor>& dsts) {
252 static std::vector<std::string> src_attrs = {
253 "depth",
"intrinsics",
"depth_scale",
"depth_max",
"stride",
255 for (
auto& k : src_attrs) {
256 if (srcs.count(k) == 0) {
258 "[UnprojectKernel] expected Tensor {} in srcs, but " 265 Tensor depth = srcs.at(
"depth");
266 Tensor intrinsics = srcs.at(
"intrinsics");
267 Tensor extrinsics = srcs.at(
"extrinsics");
268 float depth_scale = srcs.at(
"depth_scale").
Item<
float>();
269 float depth_max = srcs.at(
"depth_max").Item<
float>();
270 int64_t
stride = srcs.at(
"stride").Item<int64_t>();
276 int64_t rows_strided = depth_indexer.GetShape(0) /
stride;
277 int64_t cols_strided = depth_indexer.GetShape(1) /
stride;
284 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 287 int* count_ptr =
static_cast<int*
>(
count.GetDataPtr());
289 std::atomic<int> count_atomic(0);
290 std::atomic<int>* count_ptr = &count_atomic;
293 int64_t n = rows_strided * cols_strided;
294 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 295 CUDALauncher::LaunchGeneralKernel(
300 int64_t y = (workload_idx / cols_strided) * stride;
301 int64_t x = (workload_idx % cols_strided) * stride;
303 float d = (*
static_cast<uint16_t*
>(
304 depth_indexer.GetDataPtrFromCoord(x, y))) /
306 if (d > 0 && d < depth_max) {
309 float x_c = 0, y_c = 0, z_c = 0;
310 ti.Unproject(static_cast<float>(x), static_cast<float>(y),
311 d, &x_c, &y_c, &z_c);
313 float* vertex =
static_cast<float*
>(
314 point_indexer.GetDataPtrFromCoord(idx));
315 ti.RigidTransform(x_c, y_c, z_c, vertex + 0, vertex + 1,
319 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 320 int total_pts_count =
count.Item<
int>();
322 int total_pts_count = (*count_ptr).load();
324 dsts.emplace(
"points",
points.Slice(0, 0, total_pts_count));
327 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 328 void CUDATSDFIntegrateKernel
332 (
const std::unordered_map<std::string, Tensor>& srcs,
333 std::unordered_map<std::string, Tensor>& dsts) {
335 static std::vector<std::string> src_attrs = {
336 "depth",
"indices",
"block_keys",
"intrinsics",
337 "extrinsics",
"resolution",
"voxel_size",
"sdf_trunc",
338 "depth_scale",
"depth_max",
340 for (
auto& k : src_attrs) {
341 if (srcs.count(k) == 0) {
343 "[TSDFIntegrateKernel] expected Tensor {} in srcs, but " 349 Tensor depth = srcs.at(
"depth").
To(core::Dtype::Float32);
350 Tensor indices = srcs.at(
"indices");
351 Tensor block_keys = srcs.at(
"block_keys");
352 Tensor block_values = dsts.at(
"block_values");
355 Tensor intrinsics = srcs.at(
"intrinsics").
To(core::Dtype::Float32);
356 Tensor extrinsics = srcs.at(
"extrinsics").
To(core::Dtype::Float32);
359 int64_t resolution = srcs.at(
"resolution").
Item<int64_t>();
360 int64_t resolution3 = resolution * resolution * resolution;
362 float voxel_size = srcs.at(
"voxel_size").Item<
float>();
363 float sdf_trunc = srcs.at(
"sdf_trunc").Item<
float>();
364 float depth_scale = srcs.at(
"depth_scale").Item<
float>();
365 float depth_max = srcs.at(
"depth_max").Item<
float>();
368 NDArrayIndexer voxel_indexer({resolution, resolution, resolution});
379 bool integrate_color =
false;
380 if (srcs.count(
"color") != 0) {
381 color = srcs.at(
"color").
To(core::Dtype::Float32);
383 integrate_color =
true;
387 int64_t* indices_ptr =
static_cast<int64_t*
>(indices.
GetDataPtr());
389 int64_t n = indices.
GetLength() * resolution3;
391 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 392 CUDALauncher launcher;
400 int64_t workload_idx) {
402 int64_t block_idx = indices_ptr[workload_idx / resolution3];
403 int64_t voxel_idx = workload_idx % resolution3;
407 int* block_key_ptr =
static_cast<int*
>(
409 int64_t xb =
static_cast<int64_t
>(block_key_ptr[0]);
410 int64_t yb =
static_cast<int64_t
>(block_key_ptr[1]);
411 int64_t zb =
static_cast<int64_t
>(block_key_ptr[2]);
415 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
418 int64_t x = (xb * resolution + xv);
419 int64_t y = (yb * resolution + yv);
420 int64_t z = (zb * resolution + zv);
423 float xc, yc, zc, u, v;
424 transform_indexer.RigidTransform(
425 static_cast<float>(x), static_cast<float>(y),
426 static_cast<float>(z), &xc, &yc, &zc);
429 transform_indexer.Project(xc, yc, zc, &u, &v);
435 float depth = *
static_cast<const float*
>(
437 static_cast<int64_t>(u),
438 static_cast<int64_t>(v))) /
441 float sdf = (depth - zc);
442 if (depth <= 0 || depth > depth_max || zc <= 0 ||
446 sdf = sdf < sdf_trunc ? sdf : sdf_trunc;
450 voxel_t* voxel_ptr =
static_cast<voxel_t*
>(
452 xv, yv, zv, block_idx));
454 if (integrate_color) {
455 float* color_ptr =
static_cast<float*
>(
457 static_cast<int64_t>(u),
458 static_cast<int64_t>(v)));
460 voxel_ptr->Integrate(sdf, color_ptr[0], color_ptr[1],
463 voxel_ptr->Integrate(sdf);
469 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 470 void CUDAPointExtractionKernel
474 (
const std::unordered_map<std::string, Tensor>& srcs,
475 std::unordered_map<std::string, Tensor>& dsts) {
477 static std::vector<std::string> src_attrs = {
478 "indices",
"nb_indices",
"nb_masks",
"block_keys",
479 "block_values",
"voxel_size",
"resolution",
481 for (
auto& k : src_attrs) {
482 if (srcs.count(k) == 0) {
484 "[TSDFSurfaceExtractionKernel] expected Tensor {} in " 485 "srcs, but did not receive",
490 Tensor indices = srcs.at(
"indices");
491 Tensor nb_indices = srcs.at(
"nb_indices");
492 Tensor nb_masks = srcs.at(
"nb_masks");
493 Tensor block_keys = srcs.at(
"block_keys");
494 Tensor block_values = srcs.at(
"block_values");
497 int64_t resolution = srcs.at(
"resolution").
Item<int64_t>();
498 int64_t resolution3 = resolution * resolution * resolution;
500 float voxel_size = srcs.at(
"voxel_size").Item<
float>();
503 NDArrayIndexer voxel_indexer({resolution, resolution, resolution});
512 int64_t* indices_ptr =
static_cast<int64_t*
>(indices.
GetDataPtr());
515 int64_t n = n_blocks * resolution3;
518 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 521 int* count_ptr =
static_cast<int*
>(
count.GetDataPtr());
523 std::atomic<int> count_atomic(0);
524 std::atomic<int>* count_ptr = &count_atomic;
527 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 528 CUDALauncher launcher;
535 voxel_block_buffer_indexer.ElementByteSize(), [&]() {
537 int64_t workload_idx) {
539 int xo,
int yo,
int zo,
540 int curr_block_idx) -> voxel_t* {
541 return DeviceGetVoxelAt<voxel_t>(
542 xo, yo, zo, curr_block_idx,
543 static_cast<int>(resolution),
544 nb_block_masks_indexer,
545 nb_block_indices_indexer,
546 voxel_block_buffer_indexer);
550 int64_t workload_block_idx = workload_idx / resolution3;
551 int64_t block_idx = indices_ptr[workload_block_idx];
552 int64_t voxel_idx = workload_idx % resolution3;
556 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
558 voxel_t* voxel_ptr =
static_cast<voxel_t*
>(
559 voxel_block_buffer_indexer.GetDataPtrFromCoord(
560 xv, yv, zv, block_idx));
561 float tsdf_o = voxel_ptr->GetTSDF();
562 float weight_o = voxel_ptr->GetWeight();
563 if (weight_o <= kWeightThreshold)
return;
566 for (
int i = 0; i < 3; ++i) {
567 voxel_t* ptr = GetVoxelAt(
568 static_cast<int>(xv) + (i == 0),
569 static_cast<int>(yv) + (i == 1),
570 static_cast<int>(zv) + (i == 2),
571 static_cast<int>(workload_block_idx));
572 if (ptr ==
nullptr)
continue;
574 float tsdf_i = ptr->GetTSDF();
575 float weight_i = ptr->GetWeight();
577 if (weight_i > kWeightThreshold &&
578 tsdf_i * tsdf_o < 0) {
585 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 586 int total_count =
count.Item<
int>();
588 int total_count = (*count_ptr).load();
600 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 603 count_ptr =
static_cast<int*
>(
count.GetDataPtr());
610 voxel_block_buffer_indexer.ElementByteSize(), [&]() {
611 bool extract_color =
false;
614 if (voxel_t::HasColor()) {
615 extract_color =
true;
622 int64_t workload_idx) {
624 int xo,
int yo,
int zo,
625 int curr_block_idx) -> voxel_t* {
626 return DeviceGetVoxelAt<voxel_t>(
627 xo, yo, zo, curr_block_idx,
628 static_cast<int>(resolution),
629 nb_block_masks_indexer,
630 nb_block_indices_indexer,
631 voxel_block_buffer_indexer);
636 return DeviceGetNormalAt<voxel_t>(
637 xo, yo, zo, curr_block_idx, n,
638 static_cast<int>(resolution), voxel_size,
639 nb_block_masks_indexer,
640 nb_block_indices_indexer,
641 voxel_block_buffer_indexer);
645 int64_t workload_block_idx = workload_idx / resolution3;
646 int64_t block_idx = indices_ptr[workload_block_idx];
647 int64_t voxel_idx = workload_idx % resolution3;
651 int* block_key_ptr =
static_cast<int*
>(
653 int64_t xb =
static_cast<int64_t
>(block_key_ptr[0]);
654 int64_t yb =
static_cast<int64_t
>(block_key_ptr[1]);
655 int64_t zb =
static_cast<int64_t
>(block_key_ptr[2]);
659 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
661 voxel_t* voxel_ptr =
static_cast<voxel_t*
>(
662 voxel_block_buffer_indexer.GetDataPtrFromCoord(
663 xv, yv, zv, block_idx));
664 float tsdf_o = voxel_ptr->GetTSDF();
665 float weight_o = voxel_ptr->GetWeight();
667 if (weight_o <= kWeightThreshold)
return;
669 int64_t x = xb * resolution + xv;
670 int64_t y = yb * resolution + yv;
671 int64_t z = zb * resolution + zv;
673 float no[3] = {0}, ni[3] = {0};
674 GetNormalAt(static_cast<int>(xv), static_cast<int>(yv),
675 static_cast<int>(zv),
676 static_cast<int>(workload_block_idx), no);
679 for (
int i = 0; i < 3; ++i) {
680 voxel_t* ptr = GetVoxelAt(
681 static_cast<int>(xv) + (i == 0),
682 static_cast<int>(yv) + (i == 1),
683 static_cast<int>(zv) + (i == 2),
684 static_cast<int>(workload_block_idx));
685 if (ptr ==
nullptr)
continue;
687 float tsdf_i = ptr->GetTSDF();
688 float weight_i = ptr->GetWeight();
690 if (weight_i > kWeightThreshold &&
691 tsdf_i * tsdf_o < 0) {
692 float ratio = (0 - tsdf_o) / (tsdf_i - tsdf_o);
696 float* point_ptr =
static_cast<float*
>(
697 point_indexer.GetDataPtrFromCoord(idx));
699 voxel_size * (x + ratio *
int(i == 0));
701 voxel_size * (y + ratio *
int(i == 1));
703 voxel_size * (z + ratio *
int(i == 2));
704 GetNormalAt(static_cast<int>(xv) + (i == 0),
705 static_cast<int>(yv) + (i == 1),
706 static_cast<int>(zv) + (i == 2),
707 static_cast<int>(workload_block_idx),
710 float* normal_ptr =
static_cast<float*
>(
712 float nx = (1 - ratio) * no[0] + ratio * ni[0];
713 float ny = (1 - ratio) * no[1] + ratio * ni[1];
714 float nz = (1 - ratio) * no[2] + ratio * ni[2];
715 float norm =
static_cast<float>(
716 sqrt(nx * nx + ny * ny + nz * nz) + 1e-5);
717 normal_ptr[0] = nx / norm;
718 normal_ptr[1] = ny / norm;
719 normal_ptr[2] = nz / norm;
722 float* color_ptr =
static_cast<float*
>(
725 float r_o = voxel_ptr->GetR();
726 float g_o = voxel_ptr->GetG();
727 float b_o = voxel_ptr->GetB();
729 float r_i = ptr->GetR();
730 float g_i = ptr->GetG();
731 float b_i = ptr->GetB();
734 ((1 - ratio) * r_o + ratio * r_i) /
737 ((1 - ratio) * g_o + ratio * g_i) /
740 ((1 - ratio) * b_o + ratio * b_i) /
746 dsts.emplace(
"points",
points);
747 dsts.emplace(
"normals", normals);
750 dsts.emplace(
"colors", colors);
755 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 756 void CUDAMeshExtractionKernel
760 (
const std::unordered_map<std::string, Tensor>& srcs,
761 std::unordered_map<std::string, Tensor>& dsts) {
763 static std::vector<std::string> src_attrs = {
764 "indices",
"inv_indices",
"nb_indices",
"nb_masks",
765 "block_keys",
"block_values",
"voxel_size",
"resolution",
767 for (
auto& k : src_attrs) {
768 if (srcs.count(k) == 0) {
770 "[CUDAMarchingCubesKernel] expected Tensor {} in " 777 Tensor indices = srcs.at(
"indices");
778 Tensor inv_indices = srcs.at(
"inv_indices");
779 Tensor nb_indices = srcs.at(
"nb_indices");
780 Tensor nb_masks = srcs.at(
"nb_masks");
781 Tensor block_keys = srcs.at(
"block_keys");
782 Tensor block_values = srcs.at(
"block_values");
785 int64_t resolution = srcs.at(
"resolution").
Item<int64_t>();
786 int64_t resolution3 = resolution * resolution * resolution;
788 float voxel_size = srcs.at(
"voxel_size").Item<
float>();
791 NDArrayIndexer voxel_indexer({resolution, resolution, resolution});
794 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 798 int n_blocks =
static_cast<int>(indices.
GetLength());
804 {n_blocks, resolution, resolution, resolution, 4},
806 }
catch (
const std::runtime_error&) {
808 "[MeshExtractionKernel] Unable to allocate assistance mesh " 809 "structure for Marching " 810 "Cubes with {} active voxel blocks. Please consider using a " 811 "larger voxel size (currently {}) for TSDF " 812 "integration, or using tsdf_volume.cpu() to perform mesh " 813 "extraction on CPU.",
814 n_blocks, voxel_size);
824 int64_t* indices_ptr =
static_cast<int64_t*
>(indices.
GetDataPtr());
825 int64_t* inv_indices_ptr =
static_cast<int64_t*
>(inv_indices.
GetDataPtr());
827 int64_t n = n_blocks * resolution3;
829 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 830 CUDALauncher launcher;
840 int64_t workload_idx) {
842 int xo,
int yo,
int zo,
843 int curr_block_idx) -> voxel_t* {
844 return DeviceGetVoxelAt<voxel_t>(
845 xo, yo, zo, curr_block_idx,
846 static_cast<int>(resolution),
847 nb_block_masks_indexer,
848 nb_block_indices_indexer,
849 voxel_block_buffer_indexer);
853 int64_t workload_block_idx = workload_idx / resolution3;
854 int64_t voxel_idx = workload_idx % resolution3;
858 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
862 for (
int i = 0; i < 8; ++i) {
863 voxel_t* voxel_ptr_i = GetVoxelAt(
864 static_cast<int>(xv) + vtx_shifts[i][0],
865 static_cast<int>(yv) + vtx_shifts[i][1],
866 static_cast<int>(zv) + vtx_shifts[i][2],
867 static_cast<int>(workload_block_idx));
868 if (voxel_ptr_i ==
nullptr)
return;
870 float tsdf_i = voxel_ptr_i->GetTSDF();
871 float weight_i = voxel_ptr_i->GetWeight();
872 if (weight_i <= kWeightThreshold)
return;
874 table_idx |= ((tsdf_i < 0) ? (1 << i) : 0);
877 int* mesh_struct_ptr =
static_cast<int*
>(
879 xv, yv, zv, workload_block_idx));
880 mesh_struct_ptr[3] = table_idx;
882 if (table_idx == 0 || table_idx == 255)
return;
885 int edges_with_vertices = edge_table[table_idx];
886 for (
int i = 0; i < 12; ++i) {
887 if (edges_with_vertices & (1 << i)) {
888 int64_t xv_i = xv + edge_shifts[i][0];
889 int64_t yv_i = yv + edge_shifts[i][1];
890 int64_t zv_i = zv + edge_shifts[i][2];
891 int edge_i = edge_shifts[i][3];
893 int dxb =
static_cast<int>(xv_i / resolution);
894 int dyb =
static_cast<int>(yv_i / resolution);
895 int dzb =
static_cast<int>(zv_i / resolution);
898 (dxb + 1) + (dyb + 1) * 3 + (dzb + 1) * 9;
900 int64_t block_idx_i = *
static_cast<int64_t*
>(
901 nb_block_indices_indexer
905 int* mesh_ptr_i =
static_cast<int*
>(
907 xv_i - dxb * resolution,
908 yv_i - dyb * resolution,
909 zv_i - dzb * resolution,
910 inv_indices_ptr[block_idx_i]));
913 mesh_ptr_i[edge_i] = -1;
920 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 923 int* vtx_count_ptr =
static_cast<int*
>(vtx_count.GetDataPtr());
925 std::atomic<int> vtx_count_atomic(0);
926 std::atomic<int>* vtx_count_ptr = &vtx_count_atomic;
929 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 930 CUDALauncher::LaunchGeneralKernel(
936 int64_t workload_block_idx = workload_idx / resolution3;
937 int64_t voxel_idx = workload_idx % resolution3;
941 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
944 int* mesh_struct_ptr =
static_cast<int*
>(
946 xv, yv, zv, workload_block_idx));
949 if (mesh_struct_ptr[0] != -1 && mesh_struct_ptr[1] != -1 &&
950 mesh_struct_ptr[2] != -1) {
955 for (
int e = 0; e < 3; ++e) {
956 int vertex_idx = mesh_struct_ptr[e];
957 if (vertex_idx != -1)
continue;
964 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 965 int total_vtx_count = vtx_count.Item<
int>();
968 vtx_count_ptr =
static_cast<int*
>(vtx_count.GetDataPtr());
970 int total_vtx_count = (*vtx_count_ptr).load();
971 (*vtx_count_ptr) = 0;
987 bool extract_color =
false;
990 if (voxel_t::HasColor()) {
991 extract_color =
true;
997 int64_t workload_idx) {
999 int xo,
int yo,
int zo,
1000 int curr_block_idx) -> voxel_t* {
1001 return DeviceGetVoxelAt<voxel_t>(
1002 xo, yo, zo, curr_block_idx,
1003 static_cast<int>(resolution),
1004 nb_block_masks_indexer,
1005 nb_block_indices_indexer,
1006 voxel_block_buffer_indexer);
1009 auto GetNormalAt = [&]
OPEN3D_DEVICE(
int xo,
int yo,
int zo,
1012 return DeviceGetNormalAt<voxel_t>(
1013 xo, yo, zo, curr_block_idx, n,
1014 static_cast<int>(resolution), voxel_size,
1015 nb_block_masks_indexer,
1016 nb_block_indices_indexer,
1017 voxel_block_buffer_indexer);
1021 int64_t workload_block_idx = workload_idx / resolution3;
1022 int64_t block_idx = indices_ptr[workload_block_idx];
1023 int64_t voxel_idx = workload_idx % resolution3;
1026 int* block_key_ptr =
static_cast<int*
>(
1027 block_keys_indexer.GetDataPtrFromCoord(block_idx));
1028 int64_t xb =
static_cast<int64_t
>(block_key_ptr[0]);
1029 int64_t yb =
static_cast<int64_t
>(block_key_ptr[1]);
1030 int64_t zb =
static_cast<int64_t
>(block_key_ptr[2]);
1034 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
1037 int64_t x = xb * resolution + xv;
1038 int64_t y = yb * resolution + yv;
1039 int64_t z = zb * resolution + zv;
1042 int* mesh_struct_ptr =
static_cast<int*
>(
1044 xv, yv, zv, workload_block_idx));
1047 if (mesh_struct_ptr[0] != -1 && mesh_struct_ptr[1] != -1 &&
1048 mesh_struct_ptr[2] != -1) {
1053 voxel_t* voxel_ptr =
static_cast<voxel_t*
>(
1055 xv, yv, zv, block_idx));
1056 float tsdf_o = voxel_ptr->GetTSDF();
1057 float no[3] = {0}, ne[3] = {0};
1058 GetNormalAt(static_cast<int>(xv), static_cast<int>(yv),
1059 static_cast<int>(zv),
1060 static_cast<int>(workload_block_idx), no);
1063 for (
int e = 0; e < 3; ++e) {
1064 int vertex_idx = mesh_struct_ptr[e];
1065 if (vertex_idx != -1)
continue;
1067 voxel_t* voxel_ptr_e = GetVoxelAt(
1068 static_cast<int>(xv) + (e == 0),
1069 static_cast<int>(yv) + (e == 1),
1070 static_cast<int>(zv) + (e == 2),
1071 static_cast<int>(workload_block_idx));
1072 float tsdf_e = voxel_ptr_e->GetTSDF();
1073 float ratio = (0 - tsdf_o) / (tsdf_e - tsdf_o);
1076 mesh_struct_ptr[e] = idx;
1078 float ratio_x = ratio *
int(e == 0);
1079 float ratio_y = ratio *
int(e == 1);
1080 float ratio_z = ratio *
int(e == 2);
1082 float* vertex_ptr =
static_cast<float*
>(
1084 vertex_ptr[0] = voxel_size * (x + ratio_x);
1085 vertex_ptr[1] = voxel_size * (y + ratio_y);
1086 vertex_ptr[2] = voxel_size * (z + ratio_z);
1088 float* normal_ptr =
static_cast<float*
>(
1090 GetNormalAt(static_cast<int>(xv) + (e == 0),
1091 static_cast<int>(yv) + (e == 1),
1092 static_cast<int>(zv) + (e == 2),
1093 static_cast<int>(workload_block_idx), ne);
1094 float nx = (1 - ratio) * no[0] + ratio * ne[0];
1095 float ny = (1 - ratio) * no[1] + ratio * ne[1];
1096 float nz = (1 - ratio) * no[2] + ratio * ne[2];
1097 float norm =
static_cast<float>(
1098 sqrt(nx * nx + ny * ny + nz * nz) + 1e-5);
1099 normal_ptr[0] = nx / norm;
1100 normal_ptr[1] = ny / norm;
1101 normal_ptr[2] = nz / norm;
1103 if (extract_color) {
1104 float* color_ptr =
static_cast<float*
>(
1106 float r_o = voxel_ptr->GetR();
1107 float g_o = voxel_ptr->GetG();
1108 float b_o = voxel_ptr->GetB();
1110 float r_e = voxel_ptr_e->GetR();
1111 float g_e = voxel_ptr_e->GetG();
1112 float b_e = voxel_ptr_e->GetB();
1114 ((1 - ratio) * r_o + ratio * r_e) / 255.0f;
1116 ((1 - ratio) * g_o + ratio * g_e) / 255.0f;
1118 ((1 - ratio) * b_o + ratio * b_e) / 255.0f;
1122 dsts.emplace(
"vertices", vertices);
1123 dsts.emplace(
"normals", normals);
1125 if (extract_color) {
1126 dsts.emplace(
"colors", colors);
1131 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 1134 int* tri_count_ptr =
static_cast<int*
>(triangle_count.GetDataPtr());
1136 std::atomic<int> tri_count_atomic(0);
1137 std::atomic<int>* tri_count_ptr = &tri_count_atomic;
1144 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 1145 CUDALauncher::LaunchGeneralKernel(
1151 int64_t workload_block_idx = workload_idx / resolution3;
1152 int64_t voxel_idx = workload_idx % resolution3;
1156 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
1159 int* mesh_struct_ptr =
static_cast<int*
>(
1161 xv, yv, zv, workload_block_idx));
1163 int table_idx = mesh_struct_ptr[3];
1164 if (tri_count[table_idx] == 0)
return;
1166 for (
size_t tri = 0; tri < 16; tri += 3) {
1167 if (tri_table[table_idx][tri] == -1)
return;
1171 for (
size_t vertex = 0; vertex < 3; ++vertex) {
1172 int edge = tri_table[table_idx][tri + vertex];
1174 int64_t xv_i = xv + edge_shifts[edge][0];
1175 int64_t yv_i = yv + edge_shifts[edge][1];
1176 int64_t zv_i = zv + edge_shifts[edge][2];
1177 int64_t edge_i = edge_shifts[edge][3];
1179 int dxb =
static_cast<int>(xv_i / resolution);
1180 int dyb =
static_cast<int>(yv_i / resolution);
1181 int dzb =
static_cast<int>(zv_i / resolution);
1183 int nb_idx = (dxb + 1) + (dyb + 1) * 3 + (dzb + 1) * 9;
1185 int64_t block_idx_i = *
static_cast<int64_t*
>(
1187 workload_block_idx, nb_idx));
1188 int* mesh_struct_ptr_i =
static_cast<int*
>(
1190 xv_i - dxb * resolution,
1191 yv_i - dyb * resolution,
1192 zv_i - dzb * resolution,
1193 inv_indices_ptr[block_idx_i]));
1195 int64_t* triangle_ptr =
static_cast<int64_t*
>(
1196 triangle_indexer.GetDataPtrFromCoord(tri_idx));
1197 triangle_ptr[2 - vertex] = mesh_struct_ptr_i[edge_i];
1202 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 1203 int total_tri_count = triangle_count.Item<
int>();
1205 int total_tri_count = (*tri_count_ptr).load();
1208 triangles = triangles.Slice(0, 0, total_tri_count);
1209 dsts.emplace(
"triangles", triangles);
Definition: GeneralEWSharedImpl.h:149
OPEN3D_HOST_DEVICE float GetTSDF()
Definition: GeneralEWSharedImpl.h:109
OPEN3D_HOST_DEVICE float GetR()
Definition: GeneralEWSharedImpl.h:160
float g
Definition: GeneralEWSharedImpl.h:154
void CPUUnprojectKernel(const std::unordered_map< std::string, Tensor > &srcs, std::unordered_map< std::string, Tensor > &dsts)
Definition: GeneralEWSharedImpl.h:250
OPEN3D_HOST_DEVICE float GetG()
Definition: GeneralEWSharedImpl.h:161
void CPUTSDFIntegrateKernel(const std::unordered_map< std::string, Tensor > &srcs, std::unordered_map< std::string, Tensor > &dsts)
Definition: GeneralEWSharedImpl.h:332
Definition: GeneralEWSharedImpl.h:68
float r
Definition: GeneralEWSharedImpl.h:153
void ReleaseCache()
Definition: CUDAUtils.cpp:55
Tensor Inverse() const
Definition: Tensor.cpp:1293
Definition: CPULauncher.h:42
void * GetDataPtr()
Definition: Tensor.h:961
void CPUMeshExtractionKernel(const std::unordered_map< std::string, Tensor > &srcs, std::unordered_map< std::string, Tensor > &dsts)
Definition: GeneralEWSharedImpl.h:760
OPEN3D_HOST_DEVICE float GetTSDF()
Definition: GeneralEWSharedImpl.h:73
OPEN3D_HOST_DEVICE void Integrate(float dsdf, float dr, float dg, float db)
Definition: GeneralEWSharedImpl.h:168
void LogError(const char *format, const Args &... args)
Definition: Console.h:176
OPEN3D_DEVICE voxel_t * DeviceGetVoxelAt(int xo, int yo, int zo, int curr_block_idx, int resolution, const NDArrayIndexer &nb_block_masks_indexer, const NDArrayIndexer &nb_block_indices_indexer, const NDArrayIndexer &blocks_indexer)
Definition: GeneralEWSharedImpl.h:184
OPEN3D_HOST_DEVICE float GetR()
Definition: GeneralEWSharedImpl.h:111
Definition: GeneralEWSharedImpl.h:97
Definition: GeneralIndexer.h:133
OPEN3D_HOST_DEVICE float GetG()
Definition: GeneralEWSharedImpl.h:114
OPEN3D_HOST_DEVICE float GetTSDF()
Definition: GeneralEWSharedImpl.h:158
uint16_t b
Definition: GeneralEWSharedImpl.h:106
uint16_t r
Definition: GeneralEWSharedImpl.h:104
OPEN3D_HOST_DEVICE float GetWeight()
Definition: GeneralEWSharedImpl.h:74
Device GetDevice() const
Definition: Tensor.cpp:955
#define OPEN3D_DEVICE
Definition: CUDAUtils.h:55
OPEN3D_HOST_DEVICE float GetB()
Definition: GeneralEWSharedImpl.h:77
OPEN3D_HOST_DEVICE void * GetDataPtrFromCoord(int64_t x) const
Definition: GeneralIndexer.h:278
static const Dtype Int32
Definition: Dtype.h:44
uint16_t g
Definition: GeneralEWSharedImpl.h:105
OPEN3D_HOST_DEVICE float GetR()
Definition: GeneralEWSharedImpl.h:75
OPEN3D_HOST_DEVICE void Integrate(float dsdf, float dr, float dg, float db)
Definition: GeneralEWSharedImpl.h:83
OPEN3D_HOST_DEVICE float GetG()
Definition: GeneralEWSharedImpl.h:76
math::float4 color
Definition: LineSetBuffers.cpp:64
OPEN3D_HOST_DEVICE float GetB()
Definition: GeneralEWSharedImpl.h:162
#define OPEN3D_HOST_DEVICE
Definition: CUDAUtils.h:54
#define DISPATCH_BYTESIZE_TO_VOXEL(BYTESIZE,...)
Definition: GeneralEWSharedImpl.h:45
float tsdf
Definition: GeneralEWSharedImpl.h:150
OPEN3D_HOST_DEVICE void Integrate(float dsdf, float dr, float dg, float db)
Definition: GeneralEWSharedImpl.h:128
static bool HasColor()
Definition: GeneralEWSharedImpl.h:157
Tensor To(Dtype dtype, bool copy=false) const
Definition: Tensor.cpp:453
OPEN3D_HOST_DEVICE float GetWeight()
Definition: GeneralEWSharedImpl.h:110
OPEN3D_HOST_DEVICE void Integrate(float dsdf)
Definition: GeneralEWSharedImpl.h:120
static bool HasColor()
Definition: GeneralEWSharedImpl.h:108
OPEN3D_HOST_DEVICE int64_t ElementByteSize()
Definition: GeneralIndexer.h:189
OPEN3D_HOST_DEVICE float GetB()
Definition: GeneralEWSharedImpl.h:117
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle temperature_c int
Definition: K4aPlugin.cpp:479
float weight
Definition: GeneralEWSharedImpl.h:151
static Tensor Zeros(const SizeVector &shape, Dtype dtype, const Device &device=Device("CPU:0"))
Create a tensor fill with zeros.
Definition: Tensor.cpp:182
size_t stride
Definition: TriangleMeshBuffers.cpp:183
int count
Definition: FilePCD.cpp:61
static const Dtype Float32
Definition: Dtype.h:42
void CPUPointExtractionKernel(const std::unordered_map< std::string, Tensor > &srcs, std::unordered_map< std::string, Tensor > &dsts)
Definition: GeneralEWSharedImpl.h:474
static void LaunchGeneralKernel(int64_t n, func_t element_kernel)
General kernels with non-conventional indexers.
Definition: CPULauncher.h:176
float weight
Definition: GeneralEWSharedImpl.h:70
float tsdf
Definition: GeneralEWSharedImpl.h:69
int points
Definition: FilePCD.cpp:73
static const Dtype Int64
Definition: Dtype.h:45
Definition: PinholeCameraIntrinsic.cpp:35
OPEN3D_HOST_DEVICE float GetWeight()
Definition: GeneralEWSharedImpl.h:159
OPEN3D_HOST_DEVICE bool InBoundary(float x, float y) const
Definition: GeneralIndexer.h:257
uint16_t weight
Definition: GeneralEWSharedImpl.h:102
float b
Definition: GeneralEWSharedImpl.h:155
#define OPEN3D_ATOMIC_ADD(X, Y)
Definition: GeneralEWSharedImpl.h:42
int64_t GetLength() const
Definition: Tensor.h:943
OPEN3D_DEVICE void DeviceGetNormalAt(int xo, int yo, int zo, int curr_block_idx, float *n, int resolution, float voxel_size, const NDArrayIndexer &nb_block_masks_indexer, const NDArrayIndexer &nb_block_indices_indexer, const NDArrayIndexer &blocks_indexer)
Definition: GeneralEWSharedImpl.h:218
static bool HasColor()
Definition: GeneralEWSharedImpl.h:72
T Item() const
Definition: Tensor.h:446
void LogInfo(const char *format, const Args &... args)
Definition: Console.h:186
OPEN3D_HOST_DEVICE void Integrate(float dsdf)
Definition: GeneralEWSharedImpl.h:79
OPEN3D_HOST_DEVICE void Integrate(float dsdf)
Definition: GeneralEWSharedImpl.h:163
float tsdf
Definition: GeneralEWSharedImpl.h:101