Open3D (C++ API)  0.18.0+252c867
ImageImpl.h
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - Open3D: www.open3d.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2023 www.open3d.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
8 #include <limits>
9 
10 #include "open3d/core/CUDAUtils.h"
11 #include "open3d/core/Dispatch.h"
12 #include "open3d/core/Indexer.h"
13 #include "open3d/core/Tensor.h"
16 
17 namespace open3d {
18 namespace t {
19 namespace geometry {
20 namespace kernel {
21 namespace image {
22 
23 #ifndef __CUDACC__
24 using std::isinf;
25 using std::isnan;
26 #endif
27 
28 #ifdef __CUDACC__
29 void ToCUDA
30 #else
31 void ToCPU
32 #endif
33  (const core::Tensor& src,
34  core::Tensor& dst,
35  double scale,
36  double offset) {
37  core::Indexer indexer({src}, dst, core::DtypePolicy::NONE);
38  // elem_t: corresponds to dst_dtype.
39  // scalar_t: corresponds to src_dtype.
40  // calc_t: calculation type for intermediate results.
41 #define LINEAR_SATURATE(elem_t, calc_t) \
42  elem_t limits[2] = {std::numeric_limits<elem_t>::min(), \
43  std::numeric_limits<elem_t>::max()}; \
44  calc_t c_scale = static_cast<calc_t>(scale); \
45  calc_t c_offset = static_cast<calc_t>(offset); \
46  DISPATCH_DTYPE_TO_TEMPLATE(src.GetDtype(), [&]() { \
47  core::ParallelFor( \
48  src.GetDevice(), indexer.NumWorkloads(), \
49  [=] OPEN3D_DEVICE(int64_t workload_idx) { \
50  auto src_ptr = \
51  indexer.GetInputPtr<scalar_t>(0, workload_idx); \
52  auto dst_ptr = indexer.GetOutputPtr<elem_t>(workload_idx); \
53  calc_t out = static_cast<calc_t>(*src_ptr) * c_scale + \
54  c_offset; \
55  out = out < limits[0] ? limits[0] : out; \
56  out = out > limits[1] ? limits[1] : out; \
57  *dst_ptr = static_cast<elem_t>(out); \
58  }); \
59  });
60  core::Dtype dst_dtype = dst.GetDtype();
61  if (dst_dtype == core::Float32) {
62  LINEAR_SATURATE(float, float)
63  } else if (dst_dtype == core::Float64) {
64  LINEAR_SATURATE(double, double)
65  } else if (dst_dtype == core::Int8) {
66  LINEAR_SATURATE(int8_t, float)
67  } else if (dst_dtype == core::UInt8) {
68  LINEAR_SATURATE(uint8_t, float)
69  } else if (dst_dtype == core::Int16) {
70  LINEAR_SATURATE(int16_t, float)
71  } else if (dst_dtype == core::UInt16) {
72  LINEAR_SATURATE(uint16_t, float)
73  } else if (dst_dtype == core::Int32) {
74  LINEAR_SATURATE(int32_t, double)
75  } else if (dst_dtype == core::UInt32) {
76  LINEAR_SATURATE(uint32_t, double)
77  } else if (dst_dtype == core::Int64) {
78  LINEAR_SATURATE(int64_t, double)
79  } else if (dst_dtype == core::UInt64) {
80  LINEAR_SATURATE(uint64_t, double)
81  }
82 #undef LINEAR_SATURATE
83 }
84 
85 #ifdef __CUDACC__
86 void ClipTransformCUDA
87 #else
89 #endif
90  (const core::Tensor& src,
91  core::Tensor& dst,
92  float scale,
93  float min_value,
94  float max_value,
95  float clip_fill) {
96  NDArrayIndexer src_indexer(src, 2);
97  NDArrayIndexer dst_indexer(dst, 2);
98 
99  int64_t rows = src.GetShape(0);
100  int64_t cols = dst.GetShape(1);
101  int64_t n = rows * cols;
102 
103  DISPATCH_DTYPE_TO_TEMPLATE(src.GetDtype(), [&]() {
104  core::ParallelFor(src.GetDevice(), n,
105  [=] OPEN3D_DEVICE(int64_t workload_idx) {
106  int64_t y = workload_idx / cols;
107  int64_t x = workload_idx % cols;
108 
109  float in = static_cast<float>(
110  *src_indexer.GetDataPtr<scalar_t>(x, y));
111  float out = in / scale;
112  out = out <= min_value ? clip_fill : out;
113  out = out >= max_value ? clip_fill : out;
114  *dst_indexer.GetDataPtr<float>(x, y) = out;
115  });
116  });
117 }
118 
119 // Reimplementation of the reference:
120 // https://github.com/mp3guy/ICPCUDA/blob/master/Cuda/pyrdown.cu#L41
121 #ifdef __CUDACC__
122 void PyrDownDepthCUDA
123 #else
125 #endif
126  (const core::Tensor& src,
127  core::Tensor& dst,
128  float depth_diff,
129  float invalid_fill) {
130  NDArrayIndexer src_indexer(src, 2);
131  NDArrayIndexer dst_indexer(dst, 2);
132 
133  int rows = src_indexer.GetShape(0);
134  int cols = src_indexer.GetShape(1);
135 
136  int rows_down = dst_indexer.GetShape(0);
137  int cols_down = dst_indexer.GetShape(1);
138  int n = rows_down * cols_down;
139 
140  // Gaussian filter window size
141  // Gaussian filter weights
142  const int gkernel_size = 5;
143  const int gkernel_size_2 = gkernel_size / 2;
144  const float gweights[3] = {0.375f, 0.25f, 0.0625f};
145 
146 #ifndef __CUDACC__
147  using std::abs;
148  using std::max;
149  using std::min;
150 #endif
151 
153  src.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) {
154  int y = workload_idx / cols_down;
155  int x = workload_idx % cols_down;
156 
157  int y_src = 2 * y;
158  int x_src = 2 * x;
159 
160  float v_center = *src_indexer.GetDataPtr<float>(x_src, y_src);
161  if (v_center == invalid_fill) {
162  *dst_indexer.GetDataPtr<float>(x, y) = invalid_fill;
163  return;
164  }
165 
166  int x_min = max(0, x_src - gkernel_size_2);
167  int y_min = max(0, y_src - gkernel_size_2);
168 
169  int x_max = min(cols - 1, x_src + gkernel_size_2);
170  int y_max = min(rows - 1, y_src + gkernel_size_2);
171 
172  float v_sum = 0;
173  float w_sum = 0;
174  for (int yk = y_min; yk <= y_max; ++yk) {
175  for (int xk = x_min; xk <= x_max; ++xk) {
176  float v = *src_indexer.GetDataPtr<float>(xk, yk);
177  int dy = abs(yk - y_src);
178  int dx = abs(xk - x_src);
179 
180  if (v != invalid_fill &&
181  abs(v - v_center) < depth_diff) {
182  float w = gweights[dx] * gweights[dy];
183  v_sum += w * v;
184  w_sum += w;
185  }
186  }
187  }
188 
189  *dst_indexer.GetDataPtr<float>(x, y) =
190  w_sum == 0 ? invalid_fill : v_sum / w_sum;
191  });
192 }
193 
194 #ifdef __CUDACC__
195 void CreateVertexMapCUDA
196 #else
198 #endif
199  (const core::Tensor& src,
200  core::Tensor& dst,
201  const core::Tensor& intrinsics,
202  float invalid_fill) {
203  NDArrayIndexer src_indexer(src, 2);
204  NDArrayIndexer dst_indexer(dst, 2);
206  core::Device("CPU:0")));
207 
208  int64_t rows = src.GetShape(0);
209  int64_t cols = src.GetShape(1);
210  int64_t n = rows * cols;
211 
212 #ifndef __CUDACC__
213  using std::isinf;
214  using std::isnan;
215 #endif
216 
218  src.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) {
219  auto is_invalid = [invalid_fill] OPEN3D_DEVICE(float v) {
220  if (isinf(invalid_fill)) return isinf(v);
221  if (isnan(invalid_fill)) return isnan(v);
222  return v == invalid_fill;
223  };
224 
225  int64_t y = workload_idx / cols;
226  int64_t x = workload_idx % cols;
227 
228  float d = *src_indexer.GetDataPtr<float>(x, y);
229 
230  float* vertex = dst_indexer.GetDataPtr<float>(x, y);
231  if (!is_invalid(d)) {
232  ti.Unproject(static_cast<float>(x), static_cast<float>(y),
233  d, vertex + 0, vertex + 1, vertex + 2);
234  } else {
235  vertex[0] = invalid_fill;
236  vertex[1] = invalid_fill;
237  vertex[2] = invalid_fill;
238  }
239  });
240 }
241 #ifdef __CUDACC__
242 void CreateNormalMapCUDA
243 #else
245 #endif
246  (const core::Tensor& src, core::Tensor& dst, float invalid_fill) {
247  NDArrayIndexer src_indexer(src, 2);
248  NDArrayIndexer dst_indexer(dst, 2);
249 
250  int64_t rows = src_indexer.GetShape(0);
251  int64_t cols = src_indexer.GetShape(1);
252  int64_t n = rows * cols;
253 
255  src.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) {
256  int64_t y = workload_idx / cols;
257  int64_t x = workload_idx % cols;
258 
259  float* normal = dst_indexer.GetDataPtr<float>(x, y);
260 
261  if (y < rows - 1 && x < cols - 1) {
262  float* v00 = src_indexer.GetDataPtr<float>(x, y);
263  float* v10 = src_indexer.GetDataPtr<float>(x + 1, y);
264  float* v01 = src_indexer.GetDataPtr<float>(x, y + 1);
265 
266  if ((v00[0] == invalid_fill && v00[1] == invalid_fill &&
267  v00[2] == invalid_fill) ||
268  (v01[0] == invalid_fill && v01[1] == invalid_fill &&
269  v01[2] == invalid_fill) ||
270  (v10[0] == invalid_fill && v10[1] == invalid_fill &&
271  v10[2] == invalid_fill)) {
272  normal[0] = invalid_fill;
273  normal[1] = invalid_fill;
274  normal[2] = invalid_fill;
275  return;
276  }
277 
278  float dx0 = v01[0] - v00[0];
279  float dy0 = v01[1] - v00[1];
280  float dz0 = v01[2] - v00[2];
281 
282  float dx1 = v10[0] - v00[0];
283  float dy1 = v10[1] - v00[1];
284  float dz1 = v10[2] - v00[2];
285 
286  normal[0] = dy0 * dz1 - dz0 * dy1;
287  normal[1] = dz0 * dx1 - dx0 * dz1;
288  normal[2] = dx0 * dy1 - dy0 * dx1;
289 
290  constexpr float EPSILON = 1e-5f;
291  float normal_norm =
292  sqrt(normal[0] * normal[0] + normal[1] * normal[1] +
293  normal[2] * normal[2]);
294  normal_norm = std::max(normal_norm, EPSILON);
295  normal[0] /= normal_norm;
296  normal[1] /= normal_norm;
297  normal[2] /= normal_norm;
298  } else {
299  normal[0] = invalid_fill;
300  normal[1] = invalid_fill;
301  normal[2] = invalid_fill;
302  }
303  });
304 }
305 
306 #ifdef __CUDACC__
307 void ColorizeDepthCUDA
308 #else
310 #endif
311  (const core::Tensor& src,
312  core::Tensor& dst,
313  float scale,
314  float min_value,
315  float max_value) {
316  NDArrayIndexer src_indexer(src, 2);
317  NDArrayIndexer dst_indexer(dst, 2);
318 
319  int64_t rows = src.GetShape(0);
320  int64_t cols = dst.GetShape(1);
321  int64_t n = rows * cols;
322 
323  float inv_interval = 255.0f / (max_value - min_value);
324  DISPATCH_DTYPE_TO_TEMPLATE(src.GetDtype(), [&]() {
325  core::ParallelFor(
326  src.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) {
327  int64_t y = workload_idx / cols;
328  int64_t x = workload_idx % cols;
329 
330  float in = static_cast<float>(
331  *src_indexer.GetDataPtr<scalar_t>(x, y));
332  float out = in / scale;
333  out = out <= min_value ? min_value : out;
334  out = out >= max_value ? max_value : out;
335 
336  int idx =
337  static_cast<int>(inv_interval * (out - min_value));
338  uint8_t* out_ptr = dst_indexer.GetDataPtr<uint8_t>(x, y);
339  out_ptr[0] = turbo_srgb_bytes[idx][0];
340  out_ptr[1] = turbo_srgb_bytes[idx][1];
341  out_ptr[2] = turbo_srgb_bytes[idx][2];
342  });
343  });
344 }
345 
346 } // namespace image
347 } // namespace kernel
348 } // namespace geometry
349 } // namespace t
350 } // namespace open3d
Common CUDA utilities.
#define OPEN3D_DEVICE
Definition: CUDAUtils.h:45
#define DISPATCH_DTYPE_TO_TEMPLATE(DTYPE,...)
Definition: Dispatch.h:30
std::shared_ptr< core::Tensor > image
Definition: FilamentRenderer.cpp:183
#define LINEAR_SATURATE(elem_t, calc_t)
Definition: Device.h:18
Definition: Dtype.h:20
Definition: Indexer.h:261
Definition: Tensor.h:32
static Tensor Eye(int64_t n, Dtype dtype, const Device &device)
Create an identity matrix of size n x n.
Definition: Tensor.cpp:386
Definition: GeometryIndexer.h:161
OPEN3D_HOST_DEVICE void * GetDataPtr() const
Definition: GeometryIndexer.h:315
OPEN3D_HOST_DEVICE index_t GetShape(int i) const
Definition: GeometryIndexer.h:311
Helper class for converting coordinates/indices between 3D/3D, 3D/2D, 2D/3D.
Definition: GeometryIndexer.h:25
OPEN3D_HOST_DEVICE void Unproject(float u_in, float v_in, float d_in, float *x_out, float *y_out, float *z_out) const
Unproject a 2D uv coordinate with depth to 3D in camera coordinate.
Definition: GeometryIndexer.h:111
int offset
Definition: FilePCD.cpp:45
const Dtype UInt32
Definition: Dtype.cpp:50
const Dtype Int64
Definition: Dtype.cpp:47
const Dtype UInt16
Definition: Dtype.cpp:49
const Dtype Int32
Definition: Dtype.cpp:46
const Dtype Int16
Definition: Dtype.cpp:45
const Dtype UInt8
Definition: Dtype.cpp:48
void ParallelFor(const Device &device, int64_t n, const func_t &func)
Definition: ParallelFor.h:103
const Dtype Float64
Definition: Dtype.cpp:43
const Dtype UInt64
Definition: Dtype.cpp:51
const Dtype Int8
Definition: Dtype.cpp:44
const Dtype Float32
Definition: Dtype.cpp:42
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle temperature_c k4a_image_t image_handle uint8_t image_handle image_handle image_handle image_handle uint32_t
Definition: K4aPlugin.cpp:548
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle temperature_c k4a_image_t image_handle uint8_t image_handle image_handle image_handle image_handle image_handle timestamp_usec white_balance image_handle k4a_device_configuration_t config device_handle char size_t serial_number_size bool int32_t int32_t max_value
Definition: K4aPlugin.cpp:649
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample uint64_t
Definition: K4aPlugin.cpp:343
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle temperature_c k4a_image_t image_handle uint8_t image_handle image_handle image_handle image_handle image_handle timestamp_usec white_balance image_handle k4a_device_configuration_t config device_handle char size_t serial_number_size bool int32_t min_value
Definition: K4aPlugin.cpp:647
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t int32_t
Definition: K4aPlugin.cpp:395
void ClipTransformCPU(const core::Tensor &src, core::Tensor &dst, float scale, float min_value, float max_value, float clip_fill=0.0f)
Definition: ImageImpl.h:90
void CreateNormalMapCPU(const core::Tensor &src, core::Tensor &dst, float invalid_fill)
Definition: ImageImpl.h:246
void ColorizeDepthCPU(const core::Tensor &src, core::Tensor &dst, float scale, float min_value, float max_value)
Definition: ImageImpl.h:311
void CreateVertexMapCPU(const core::Tensor &src, core::Tensor &dst, const core::Tensor &intrinsics, float invalid_fill)
Definition: ImageImpl.h:199
void PyrDownDepthCPU(const core::Tensor &src, core::Tensor &dst, float diff_threshold, float invalid_fill)
Definition: ImageImpl.h:126
void ToCPU(const core::Tensor &src, core::Tensor &dst, double scale, double offset)
Definition: ImageImpl.h:33
Definition: PinholeCameraIntrinsic.cpp:16