Open3D (C++ API)  0.17.0
CUDAUtils.h
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - Open3D: www.open3d.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2023 www.open3d.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
13 
14 #pragma once
15 
16 #include "open3d/core/Device.h"
17 #include "open3d/utility/Logging.h"
18 
19 #ifdef BUILD_CUDA_MODULE
20 
21 #include <cuda.h>
22 #include <cuda_runtime.h>
23 
24 #include <memory>
25 #include <vector>
26 
28 
29 #define OPEN3D_FORCE_INLINE __forceinline__
30 #define OPEN3D_HOST_DEVICE __host__ __device__
31 #define OPEN3D_DEVICE __device__
32 #define OPEN3D_ASSERT_HOST_DEVICE_LAMBDA(type) \
33  static_assert(__nv_is_extended_host_device_lambda_closure_type(type), \
34  #type " must be a __host__ __device__ lambda")
35 #define OPEN3D_CUDA_CHECK(err) \
36  open3d::core::__OPEN3D_CUDA_CHECK(err, __FILE__, __LINE__)
37 #define OPEN3D_GET_LAST_CUDA_ERROR(message) \
38  __OPEN3D_GET_LAST_CUDA_ERROR(message, __FILE__, __LINE__)
39 #define CUDA_CALL(cuda_function, ...) cuda_function(__VA_ARGS__);
40 
41 #else // #ifdef BUILD_CUDA_MODULE
42 
43 #define OPEN3D_FORCE_INLINE inline
44 #define OPEN3D_HOST_DEVICE
45 #define OPEN3D_DEVICE
46 #define OPEN3D_ASSERT_HOST_DEVICE_LAMBDA(type)
47 #define OPEN3D_CUDA_CHECK(err)
48 #define OPEN3D_GET_LAST_CUDA_ERROR(message)
49 #define CUDA_CALL(cuda_function, ...) \
50  open3d::utility::LogError( \
51  "Not built with CUDA, cannot call " #cuda_function);
52 
53 #endif // #ifdef BUILD_CUDA_MODULE
54 
55 namespace open3d {
56 namespace core {
57 
58 #ifdef BUILD_CUDA_MODULE
59 
83 class CUDAScopedDevice {
84 public:
85  explicit CUDAScopedDevice(int device_id);
86 
87  explicit CUDAScopedDevice(const Device& device);
88 
90 
91  CUDAScopedDevice(const CUDAScopedDevice&) = delete;
93 
94 private:
95  int prev_device_id_;
96 };
97 
137 class CUDAScopedStream {
138 private:
139  struct CreateNewStreamTag {
140  CreateNewStreamTag(const CreateNewStreamTag&) = delete;
141  CreateNewStreamTag& operator=(const CreateNewStreamTag&) = delete;
142  CreateNewStreamTag(CreateNewStreamTag&&) = delete;
143  CreateNewStreamTag& operator=(CreateNewStreamTag&&) = delete;
144  };
145 
146 public:
147  constexpr static CreateNewStreamTag CreateNewStream = {};
148 
149  explicit CUDAScopedStream(const CreateNewStreamTag&);
150 
151  explicit CUDAScopedStream(cudaStream_t stream);
152 
153  ~CUDAScopedStream();
154 
155  CUDAScopedStream(const CUDAScopedStream&) = delete;
156  CUDAScopedStream& operator=(const CUDAScopedStream&) = delete;
157 
158 private:
159  cudaStream_t prev_stream_;
160  cudaStream_t new_stream_;
161  bool owns_new_stream_ = false;
162 };
163 
177 class CUDAState {
178 public:
179  static CUDAState& GetInstance();
180 
181  CUDAState(const CUDAState&) = delete;
182  CUDAState& operator=(const CUDAState&) = delete;
183 
186  bool IsP2PEnabled(int src_id, int tar_id) const;
187 
190  bool IsP2PEnabled(const Device& src, const Device& tar) const;
191 
194  void ForceDisableP2PForTesting();
195 
196 private:
197  CUDAState();
198 
199  std::vector<std::vector<bool>> p2p_enabled_;
200 };
201 
203 int GetCUDACurrentWarpSize();
204 
206 int GetCUDACurrentDeviceTextureAlignment();
207 
209 size_t GetCUDACurrentTotalMemSize();
210 
211 #else
212 
215 public:
216  explicit CUDAScopedDevice(int device_id) {}
217  explicit CUDAScopedDevice(const Device& device) {}
221 };
222 
223 #endif
224 
225 namespace cuda {
226 
229 int DeviceCount();
230 
233 bool IsAvailable();
234 
236 void ReleaseCache();
237 
240 void Synchronize();
241 
246 void Synchronize(const Device& device);
247 
251 void AssertCUDADeviceAvailable(int device_id);
252 
256 void AssertCUDADeviceAvailable(const Device& device);
257 
258 #ifdef BUILD_CUDA_MODULE
259 
260 int GetDevice();
261 cudaStream_t GetStream();
262 cudaStream_t GetDefaultStream();
263 
264 #endif
265 
266 } // namespace cuda
267 } // namespace core
268 } // namespace open3d
269 
270 // Exposed as implementation detail of macros at the end of the file.
271 #ifdef BUILD_CUDA_MODULE
272 
273 namespace open3d {
274 namespace core {
275 
276 void __OPEN3D_CUDA_CHECK(cudaError_t err, const char* file, const int line);
277 
278 void __OPEN3D_GET_LAST_CUDA_ERROR(const char* message,
279  const char* file,
280  const int line);
281 
282 } // namespace core
283 } // namespace open3d
284 
285 #endif
When CUDA is not enabled, this is a dummy class.
Definition: CUDAUtils.h:214
CUDAScopedDevice(const Device &device)
Definition: CUDAUtils.h:217
CUDAScopedDevice(const CUDAScopedDevice &)=delete
CUDAScopedDevice(int device_id)
Definition: CUDAUtils.h:216
~CUDAScopedDevice()
Definition: CUDAUtils.h:218
CUDAScopedDevice & operator=(const CUDAScopedDevice &)=delete
Definition: Device.h:18
void ReleaseCache()
Releases CUDA memory manager cache. This is typically used for debugging.
Definition: CUDAUtils.cpp:40
bool IsAvailable()
Definition: CUDAUtils.cpp:38
int DeviceCount()
Definition: CUDAUtils.cpp:21
void Synchronize()
Definition: CUDAUtils.cpp:58
void AssertCUDADeviceAvailable(int device_id)
Definition: CUDAUtils.cpp:75
Definition: PinholeCameraIntrinsic.cpp:16