Open3D (C++ API)  0.12.0
CPULauncher.h
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - Open3D: www.open3d.org -
3 // ----------------------------------------------------------------------------
4 // The MIT License (MIT)
5 //
6 // Copyright (c) 2018 www.open3d.org
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
14 //
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
17 //
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 // IN THE SOFTWARE.
25 // ----------------------------------------------------------------------------
26 
27 #pragma once
28 
29 #include <cassert>
30 #include <vector>
31 
33 #include "open3d/core/Indexer.h"
34 #include "open3d/core/Tensor.h"
36 #include "open3d/utility/Console.h"
37 
38 namespace open3d {
39 namespace core {
40 namespace kernel {
41 
42 class CPULauncher {
43 public:
51  template <typename func_t>
52  static void LaunchIndexFillKernel(const Indexer& indexer,
53  func_t element_kernel) {
54 #pragma omp parallel for schedule(static)
55  for (int64_t workload_idx = 0; workload_idx < indexer.NumWorkloads();
56  ++workload_idx) {
57  element_kernel(indexer.GetInputPtr(0, workload_idx), workload_idx);
58  }
59  }
60 
61  template <typename func_t>
62  static void LaunchUnaryEWKernel(const Indexer& indexer,
63  func_t element_kernel) {
64 #pragma omp parallel for schedule(static)
65  for (int64_t workload_idx = 0; workload_idx < indexer.NumWorkloads();
66  ++workload_idx) {
67  element_kernel(indexer.GetInputPtr(0, workload_idx),
68  indexer.GetOutputPtr(workload_idx));
69  }
70  }
71 
72  template <typename func_t>
73  static void LaunchBinaryEWKernel(const Indexer& indexer,
74  func_t element_kernel) {
75 #pragma omp parallel for schedule(static)
76  for (int64_t workload_idx = 0; workload_idx < indexer.NumWorkloads();
77  ++workload_idx) {
78  element_kernel(indexer.GetInputPtr(0, workload_idx),
79  indexer.GetInputPtr(1, workload_idx),
80  indexer.GetOutputPtr(workload_idx));
81  }
82  }
83 
84  template <typename func_t>
85  static void LaunchAdvancedIndexerKernel(const AdvancedIndexer& indexer,
86  func_t element_kernel) {
87 #pragma omp parallel for schedule(static)
88  for (int64_t workload_idx = 0; workload_idx < indexer.NumWorkloads();
89  ++workload_idx) {
90  element_kernel(indexer.GetInputPtr(workload_idx),
91  indexer.GetOutputPtr(workload_idx));
92  }
93  }
94 
95  template <typename scalar_t, typename func_t>
96  static void LaunchReductionKernelSerial(const Indexer& indexer,
97  func_t element_kernel) {
98  for (int64_t workload_idx = 0; workload_idx < indexer.NumWorkloads();
99  ++workload_idx) {
100  element_kernel(indexer.GetInputPtr(0, workload_idx),
101  indexer.GetOutputPtr(workload_idx));
102  }
103  }
104 
107  template <typename scalar_t, typename func_t>
108  static void LaunchReductionKernelTwoPass(const Indexer& indexer,
109  func_t element_kernel,
110  scalar_t identity) {
111  if (indexer.NumOutputElements() > 1) {
113  "Internal error: two-pass reduction only works for "
114  "single-output reduction ops.");
115  }
116  int64_t num_workloads = indexer.NumWorkloads();
117  int64_t num_threads = GetMaxThreads();
118  int64_t workload_per_thread =
119  (num_workloads + num_threads - 1) / num_threads;
120  std::vector<scalar_t> thread_results(num_threads, identity);
121 
122 #pragma omp parallel for schedule(static)
123  for (int64_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
124  int64_t start = thread_idx * workload_per_thread;
125  int64_t end = std::min(start + workload_per_thread, num_workloads);
126  for (int64_t workload_idx = start; workload_idx < end;
127  ++workload_idx) {
128  element_kernel(indexer.GetInputPtr(0, workload_idx),
129  &thread_results[thread_idx]);
130  }
131  }
132  void* output_ptr = indexer.GetOutputPtr(0);
133  for (int64_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
134  element_kernel(&thread_results[thread_idx], output_ptr);
135  }
136  }
137 
138  template <typename scalar_t, typename func_t>
139  static void LaunchReductionParallelDim(const Indexer& indexer,
140  func_t element_kernel) {
141  // Prefers outer dimension >= num_threads.
142  const int64_t* indexer_shape = indexer.GetMasterShape();
143  const int64_t num_dims = indexer.NumDims();
144  int64_t num_threads = GetMaxThreads();
145 
146  // Init best_dim as the outer-most non-reduction dim.
147  int64_t best_dim = num_dims - 1;
148  while (best_dim >= 0 && indexer.IsReductionDim(best_dim)) {
149  best_dim--;
150  }
151  for (int64_t dim = best_dim; dim >= 0 && !indexer.IsReductionDim(dim);
152  --dim) {
153  if (indexer_shape[dim] >= num_threads) {
154  best_dim = dim;
155  break;
156  } else if (indexer_shape[dim] > indexer_shape[best_dim]) {
157  best_dim = dim;
158  }
159  }
160  if (best_dim == -1) {
162  "Internal error: all dims are reduction dims, use "
163  "LaunchReductionKernelTwoPass instead.");
164  }
165 
166 #pragma omp parallel for schedule(static)
167  for (int64_t i = 0; i < indexer_shape[best_dim]; ++i) {
168  Indexer sub_indexer(indexer);
169  sub_indexer.ShrinkDim(best_dim, i, 1);
170  LaunchReductionKernelSerial<scalar_t>(sub_indexer, element_kernel);
171  }
172  }
173 
175  template <typename func_t>
176  static void LaunchGeneralKernel(int64_t n, func_t element_kernel) {
177 #pragma omp parallel for schedule(static)
178  for (int64_t workload_idx = 0; workload_idx < n; ++workload_idx) {
179  element_kernel(workload_idx);
180  }
181  }
182 };
183 
184 } // namespace kernel
185 } // namespace core
186 } // namespace open3d
OPEN3D_HOST_DEVICE char * GetOutputPtr(int64_t workload_idx) const
Definition: Indexer.h:414
int64_t NumWorkloads() const
Definition: Indexer.cpp:371
int64_t NumWorkloads() const
Definition: AdvancedIndexing.h:213
bool IsReductionDim(int64_t dim) const
Returns true if the dim -th dimension is reduced.
Definition: Indexer.h:390
Definition: CPULauncher.h:42
OPEN3D_HOST_DEVICE char * GetOutputPtr(int64_t workload_idx) const
Definition: AdvancedIndexing.h:192
void LogError(const char *format, const Args &... args)
Definition: Console.h:176
static void LaunchReductionKernelTwoPass(const Indexer &indexer, func_t element_kernel, scalar_t identity)
Definition: CPULauncher.h:108
OPEN3D_HOST_DEVICE char * GetInputPtr(int64_t input_idx, int64_t workload_idx) const
Definition: Indexer.h:402
int GetMaxThreads()
Definition: ParallelUtil.h:33
Definition: AdvancedIndexing.h:135
static void LaunchReductionParallelDim(const Indexer &indexer, func_t element_kernel)
Definition: CPULauncher.h:139
static void LaunchIndexFillKernel(const Indexer &indexer, func_t element_kernel)
Definition: CPULauncher.h:52
static void LaunchGeneralKernel(int64_t n, func_t element_kernel)
General kernels with non-conventional indexers.
Definition: CPULauncher.h:176
int64_t NumDims() const
Returns number of dimensions of the Indexer.
Definition: Indexer.h:311
Definition: PinholeCameraIntrinsic.cpp:35
int64_t NumOutputElements() const
Returns the number of output elements.
Definition: Indexer.cpp:379
const int64_t * GetMasterShape() const
Definition: Indexer.h:315
Definition: Indexer.h:261
void ShrinkDim(int64_t dim, int64_t start, int64_t size)
Definition: Indexer.cpp:338
OPEN3D_HOST_DEVICE char * GetInputPtr(int64_t workload_idx) const
Definition: AdvancedIndexing.h:185
static void LaunchReductionKernelSerial(const Indexer &indexer, func_t element_kernel)
Definition: CPULauncher.h:96
static void LaunchUnaryEWKernel(const Indexer &indexer, func_t element_kernel)
Definition: CPULauncher.h:62
static void LaunchAdvancedIndexerKernel(const AdvancedIndexer &indexer, func_t element_kernel)
Definition: CPULauncher.h:85
static void LaunchBinaryEWKernel(const Indexer &indexer, func_t element_kernel)
Definition: CPULauncher.h:73