STIR 6.4.0
distributable.txx
1/*
2 Copyright (C) 2024, 2025 University College London
3 Copyright (C) 2020, 2022, University of Pennsylvania
4 Copyright (C) 2025, Commonwealth Scientific and Industrial Research Organisation
5 This file is part of STIR.
6
7 SPDX-License-Identifier: Apache-2.0 AND License-ref-PARAPET-license
8
9 See STIR/LICENSE.txt for details
10*/
11/*!
12
13 \file
14 \ingroup distributable
15
16 \brief Implementation of stir::LM_distributable_computation() and related functions
17
18 \author Nikos Efthimiou
19 \author Kris Thielemans
20 \author Ashley Gillman
21*/
22#include "stir/shared_ptr.h"
23#include "stir/recon_buildblock/distributable.h"
24#include "stir/DiscretisedDensity.h"
25#include "stir/CPUTimer.h"
26#include "stir/HighResWallClockTimer.h"
27#include "stir/is_null_ptr.h"
28#include "stir/info.h"
29#include "stir/format.h"
30
31#include "stir/recon_buildblock/ProjMatrixByBin.h"
32#include "stir/recon_buildblock/ProjMatrixElemsForOneBin.h"
33#include "stir/Bin.h"
34
35#include "stir/num_threads.h"
36
37START_NAMESPACE_STIR
38
39template <typename CallBackT>
40void
41LM_distributable_computation(const shared_ptr<ProjMatrixByBin> PM_sptr,
42 const shared_ptr<ProjDataInfo>& proj_data_info_sptr,
43 DiscretisedDensity<3, float>* output_image_ptr,
44 const DiscretisedDensity<3, float>* input_image_ptr,
45 const std::vector<BinAndCorr>& record_ptr,
46 const int subset_num,
47 const int num_subsets,
48 const bool has_add,
49 const bool accumulate,
50 double* double_out_ptr,
51 CallBackT&& call_back)
52{
53
54 CPUTimer CPU_timer;
55 CPU_timer.start();
56 HighResWallClockTimer wall_clock_timer;
57 wall_clock_timer.start();
58
59 assert(!record_ptr.empty());
60
61 if (output_image_ptr != NULL && !accumulate)
62 output_image_ptr->fill(0.F);
63
64 std::vector<shared_ptr<DiscretisedDensity<3, float>>> local_output_image_sptrs;
65 std::vector<double> local_double_outs;
66 std::vector<double*> local_double_out_ptrs;
67 std::vector<int> local_counts, local_count2s;
68 std::vector<ProjMatrixElemsForOneBin> local_row;
69#ifdef STIR_OPENMP
70# pragma omp parallel shared(local_output_image_sptrs, local_row, local_double_outs, local_counts, local_count2s)
71#endif
72 // start of threaded section if openmp
73 {
74#ifdef STIR_OPENMP
75# pragma omp single
76#endif
77 // allocate "local" vectors
78 {
79#ifdef STIR_OPENMP
80 const auto num_threads = omp_get_num_threads();
81#else
82 const int num_threads = 1;
83#endif
84 info("Listmode gradient calculation: starting loop with " + std::to_string(num_threads) + " threads", 2);
85 local_output_image_sptrs.resize(get_max_num_threads(), shared_ptr<DiscretisedDensity<3, float>>());
86 local_double_out_ptrs.resize(get_max_num_threads(), 0);
87 if (double_out_ptr)
88 {
89 local_double_outs.resize(get_max_num_threads(), 0.);
90 for (int t = 0; t < get_max_num_threads(); ++t)
91 local_double_out_ptrs[t] = &local_double_outs[t];
92 }
93 local_counts.resize(get_max_num_threads(), 0);
94 local_count2s.resize(get_max_num_threads(), 0);
95 local_row.resize(get_max_num_threads(), ProjMatrixElemsForOneBin());
96 }
97#ifdef STIR_OPENMP
98# pragma omp for schedule(dynamic)
99#endif
100 // note: VC uses OpenMP 2.0, so need signed integer for loop
101 for (long int ievent = 0; ievent < static_cast<long>(record_ptr.size()); ++ievent)
102 {
103 auto& record = record_ptr.at(ievent);
104 if (record.my_bin.get_bin_value() == 0.0f) // shouldn't happen really, but a check probably doesn't hurt
105 continue;
106
107#ifdef STIR_OPENMP
108 const int thread_num = omp_get_thread_num();
109#else
110 const int thread_num = 0;
111#endif
112
113 if (output_image_ptr != NULL)
114 {
115 if (is_null_ptr(local_output_image_sptrs[thread_num]))
116 local_output_image_sptrs[thread_num].reset(output_image_ptr->get_empty_copy());
117 }
118
119 const Bin& measured_bin = record.my_bin;
120
121 if (num_subsets > 1)
122 {
123 Bin basic_bin = measured_bin;
124 if (!PM_sptr->get_symmetries_ptr()->is_basic(measured_bin))
125 PM_sptr->get_symmetries_ptr()->find_basic_bin(basic_bin);
126
127 if (subset_num != static_cast<int>(basic_bin.view_num() % num_subsets))
128 {
129 continue;
130 }
131 }
132
133 PM_sptr->get_proj_matrix_elems_for_one_bin(local_row[thread_num], measured_bin);
134 call_back(*local_output_image_sptrs[thread_num],
135 local_row[thread_num],
136 has_add ? record.my_corr : 0.F,
137 measured_bin,
138 *input_image_ptr,
139 local_double_out_ptrs[thread_num]);
140 }
141 }
142 // flatten data constructed by threads (or collapse unitary dim if no threading)
143 {
144 if (double_out_ptr != NULL)
145 {
146 for (int i = 0; i < static_cast<int>(local_double_outs.size()); ++i)
147 *double_out_ptr += local_double_outs[i]; // accumulate all (as they were initialised to zero)
148 }
149 // count += std::accumulate(local_counts.begin(), local_counts.end(), 0);
150 // count2 += std::accumulate(local_count2s.begin(), local_count2s.end(), 0);
151
152 if (output_image_ptr != NULL)
153 {
154 for (int i = 0; i < static_cast<int>(local_output_image_sptrs.size()); ++i)
155 if (!is_null_ptr(local_output_image_sptrs[i])) // only accumulate if a thread filled something in
156 *output_image_ptr += *(local_output_image_sptrs[i]);
157 }
158 }
159 CPU_timer.stop();
160 wall_clock_timer.stop();
161 info(format(
162 "Computation times for distributable_computation, CPU {}s, wall-clock {}s", CPU_timer.value(), wall_clock_timer.value()));
163}
164
165END_NAMESPACE_STIR