Open3D (C++ API)
CPULauncher.h
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - Open3D: www.open3d.org -
3 // ----------------------------------------------------------------------------
4 // The MIT License (MIT)
5 //
6 // Copyright (c) 2018 www.open3d.org
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
14 //
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
17 //
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 // IN THE SOFTWARE.
25 // ----------------------------------------------------------------------------
26 
27 #pragma once
28 
29 #include <cassert>
30 #include <vector>
31 
33 #include "Open3D/Core/Indexer.h"
35 #include "Open3D/Core/Tensor.h"
36 #include "Open3D/Utility/Console.h"
37 
38 namespace open3d {
39 namespace kernel {
40 
41 class CPULauncher {
42 public:
43  template <typename func_t>
44  static void LaunchUnaryEWKernel(const Indexer& indexer,
45  func_t element_kernel) {
46 #ifdef _OPENMP
47 #pragma omp parallel for schedule(static)
48 #endif
49  for (int64_t workload_idx = 0; workload_idx < indexer.NumWorkloads();
50  ++workload_idx) {
51  element_kernel(indexer.GetInputPtr(0, workload_idx),
52  indexer.GetOutputPtr(workload_idx));
53  }
54  }
55 
56  template <typename func_t>
57  static void LaunchBinaryEWKernel(const Indexer& indexer,
58  func_t element_kernel) {
59 #ifdef _OPENMP
60 #pragma omp parallel for schedule(static)
61 #endif
62  for (int64_t workload_idx = 0; workload_idx < indexer.NumWorkloads();
63  ++workload_idx) {
64  element_kernel(indexer.GetInputPtr(0, workload_idx),
65  indexer.GetInputPtr(1, workload_idx),
66  indexer.GetOutputPtr(workload_idx));
67  }
68  }
69 
70  template <typename func_t>
71  static void LaunchAdvancedIndexerKernel(const AdvancedIndexer& indexer,
72  func_t element_kernel) {
73 #ifdef _OPENMP
74 #pragma omp parallel for schedule(static)
75 #endif
76  for (int64_t workload_idx = 0; workload_idx < indexer.NumWorkloads();
77  ++workload_idx) {
78  element_kernel(indexer.GetInputPtr(workload_idx),
79  indexer.GetOutputPtr(workload_idx));
80  }
81  }
82 
83  template <typename scalar_t, typename func_t>
84  static void LaunchReductionKernelSerial(const Indexer& indexer,
85  func_t element_kernel) {
86  for (int64_t workload_idx = 0; workload_idx < indexer.NumWorkloads();
87  ++workload_idx) {
88  element_kernel(indexer.GetInputPtr(0, workload_idx),
89  indexer.GetOutputPtr(workload_idx));
90  }
91  }
92 
95  template <typename scalar_t, typename func_t>
96  static void LaunchReductionKernelTwoPass(const Indexer& indexer,
97  func_t element_kernel,
98  scalar_t identity) {
99  if (indexer.NumOutputElements() > 1) {
101  "Internal error: two-pass reduction only works for "
102  "single-output reduction ops.");
103  }
104  int64_t num_workloads = indexer.NumWorkloads();
105  int64_t num_threads = parallel_util::GetMaxThreads();
106  int64_t workload_per_thread =
107  (num_workloads + num_threads - 1) / num_threads;
108  std::vector<scalar_t> thread_results(num_threads, identity);
109 
110 #ifdef _OPENMP
111 #pragma omp parallel for schedule(static)
112 #endif
113  for (int64_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
114  int64_t start = thread_idx * workload_per_thread;
115  int64_t end = std::min(start + workload_per_thread, num_workloads);
116  for (int64_t workload_idx = start; workload_idx < end;
117  ++workload_idx) {
118  element_kernel(indexer.GetInputPtr(0, workload_idx),
119  &thread_results[thread_idx]);
120  }
121  }
122  void* output_ptr = indexer.GetOutputPtr(0);
123  for (int64_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
124  element_kernel(&thread_results[thread_idx], output_ptr);
125  }
126  }
127 
128  template <typename scalar_t, typename func_t>
129  static void LaunchReductionParallelDim(const Indexer& indexer,
130  func_t element_kernel) {
131  // Prefers outer dimension >= num_threads.
132  const int64_t* indexer_shape = indexer.GetMasterShape();
133  const int64_t num_dims = indexer.NumDims();
134  int64_t num_threads = parallel_util::GetMaxThreads();
135 
136  // Init best_dim as the outer-most non-reduction dim.
137  int64_t best_dim = num_dims - 1;
138  while (best_dim >= 0 && indexer.IsReductionDim(best_dim)) {
139  best_dim--;
140  }
141  for (int64_t dim = best_dim; dim >= 0 && !indexer.IsReductionDim(dim);
142  --dim) {
143  if (indexer_shape[dim] >= num_threads) {
144  best_dim = dim;
145  break;
146  } else if (indexer_shape[dim] > indexer_shape[best_dim]) {
147  best_dim = dim;
148  }
149  }
150  if (best_dim == -1) {
152  "Internal error: all dims are reduction dims, use "
153  "LaunchReductionKernelTwoPass instead.");
154  }
155 
156 #ifdef _OPENMP
157 #pragma omp parallel for schedule(static)
158 #endif
159  for (int64_t i = 0; i < indexer_shape[best_dim]; ++i) {
160  Indexer sub_indexer(indexer);
161  sub_indexer.ShrinkDim(best_dim, i, 1);
162  LaunchReductionKernelSerial<scalar_t>(sub_indexer, element_kernel);
163  }
164  }
165 };
166 
167 } // namespace kernel
168 } // namespace open3d
Definition: CPULauncher.h:41
bool IsReductionDim(int64_t dim) const
Returns true if the dim -th dimension is reduced.
Definition: Indexer.h:389
static void LaunchUnaryEWKernel(const Indexer &indexer, func_t element_kernel)
Definition: CPULauncher.h:44
void LogError(const char *format, const Args &... args)
Definition: Console.h:174
int64_t NumWorkloads() const
Definition: Indexer.cpp:370
int GetMaxThreads()
Definition: ParallelUtil.h:33
static void LaunchReductionKernelTwoPass(const Indexer &indexer, func_t element_kernel, scalar_t identity)
Definition: CPULauncher.h:96
OPEN3D_HOST_DEVICE char * GetOutputPtr(int64_t workload_idx) const
Definition: Indexer.h:413
int64_t NumWorkloads() const
Definition: AdvancedIndexing.h:213
Definition: AdvancedIndexing.h:134
void ShrinkDim(int64_t dim, int64_t start, int64_t size)
Definition: Indexer.cpp:337
static void LaunchBinaryEWKernel(const Indexer &indexer, func_t element_kernel)
Definition: CPULauncher.h:57
const int64_t * GetMasterShape() const
Definition: Indexer.h:314
int64_t NumOutputElements() const
Returns the number of output elements.
Definition: Indexer.cpp:378
int64_t NumDims() const
Returns number of dimensions of the Indexer.
Definition: Indexer.h:310
OPEN3D_HOST_DEVICE char * GetInputPtr(int64_t workload_idx) const
Definition: AdvancedIndexing.h:185
Definition: Open3DViewer.h:29
static void LaunchAdvancedIndexerKernel(const AdvancedIndexer &indexer, func_t element_kernel)
Definition: CPULauncher.h:71
Definition: Indexer.h:260
OPEN3D_HOST_DEVICE char * GetOutputPtr(int64_t workload_idx) const
Definition: AdvancedIndexing.h:192
static void LaunchReductionParallelDim(const Indexer &indexer, func_t element_kernel)
Definition: CPULauncher.h:129
static void LaunchReductionKernelSerial(const Indexer &indexer, func_t element_kernel)
Definition: CPULauncher.h:84
OPEN3D_HOST_DEVICE char * GetInputPtr(int64_t input_idx, int64_t workload_idx) const
Definition: Indexer.h:401