STIR 6.4.0
cuda_utilities.h
Go to the documentation of this file.
1/*
2 Copyright (C) 2024, 2026, University College London
3 Copyright (C) 2025, University of Milano-Bicocca
4 This file is part of STIR.
5
6 SPDX-License-Identifier: Apache-2.0
7
8 See STIR/LICENSE.txt for details
9*/
10
11#ifndef __stir_cuda_utilities_H__
12#define __stir_cuda_utilities_H__
13
22#include "stir/Array.h"
23#include "stir/info.h"
24#include "stir/error.h"
25#ifdef __CUDACC__
26# include <cuda_runtime.h>
27# include "cuvec.cuh"
28#endif
29#include <vector>
30
31START_NAMESPACE_STIR
32
33#ifndef __CUDACC__
34# ifndef __host__
35# define __host__
36# endif
37# ifndef __device__
38# define __device__
39# endif
40#endif
41
42#ifndef __CUDACC__
43struct cuda_dim3
44{
45 unsigned int x = 1, y = 1, z = 1;
46};
47struct cuda_int3
48{
49 int x = 0, y = 0, z = 0;
50};
51#else
52typedef dim3 cuda_dim3;
53typedef int3 cuda_int3;
54#endif
55
56#ifdef __CUDACC__
57
59
62template <int num_dimensions, typename elemT>
63inline void
64array_to_device(elemT* dev_data, const Array<num_dimensions, elemT>& stir_array)
65{
66 if (stir_array.is_contiguous())
67 {
68 info("array_to_device contiguous", 100);
69 cudaMemcpy(dev_data, stir_array.get_const_full_data_ptr(), stir_array.size_all() * sizeof(elemT), cudaMemcpyHostToDevice);
70 stir_array.release_const_full_data_ptr();
71 }
72 else
73 {
74 info("array_to_device non-contiguous", 100);
75 // Allocate host memory to get contiguous vector, copy array to it and copy from device to host
76 std::vector<elemT> tmp_data(stir_array.size_all());
77 std::copy(stir_array.begin_all(), stir_array.end_all(), tmp_data.begin());
78 cudaMemcpy(dev_data, tmp_data.data(), stir_array.size_all() * sizeof(elemT), cudaMemcpyHostToDevice);
79 }
80}
81
83
86template <int num_dimensions, typename elemT>
87inline void
88array_to_device(CuVec<elemT>& dev_data, const Array<num_dimensions, elemT>& stir_array)
89{
90 dev_data.resize(stir_array.size_all());
91 std::copy(stir_array.begin_all(), stir_array.end_all(), dev_data.begin());
92}
93
95
99template <int num_dimensions, typename elemT>
100inline void
101array_to_host(Array<num_dimensions, elemT>& stir_array, const elemT* dev_data, bool /* sync */ = true)
102{
103 if (stir_array.is_contiguous())
104 {
105 info("array_to_host contiguous", 100);
106 cudaMemcpy(stir_array.get_full_data_ptr(), dev_data, stir_array.size_all() * sizeof(elemT), cudaMemcpyDeviceToHost);
107 stir_array.release_full_data_ptr();
108 }
109 else
110 {
111 info("array_to_host non-contiguous", 100);
112 // Allocate host memory for the result and copy from device to host
113 std::vector<elemT> tmp_data(stir_array.size_all());
114 cudaMemcpy(tmp_data.data(), dev_data, stir_array.size_all() * sizeof(elemT), cudaMemcpyDeviceToHost);
115 // Copy the data to the stir_array
116 std::copy(tmp_data.begin(), tmp_data.end(), stir_array.begin_all());
117 }
118}
119
121
125template <int num_dimensions, typename elemT>
126inline void
127array_to_host(Array<num_dimensions, elemT>& stir_array, const CuVec<elemT>& dev_data, bool sync = true)
128{
129 if (sync)
130 cudaDeviceSynchronize();
131 if (stir_array.size_all() != dev_data.size())
132 error("array_to_host: size mismatch between CuVec and Array");
133 std::copy(dev_data.begin(), dev_data.end(), stir_array.begin_all());
134}
135
137template <typename elemT>
138__device__ inline void
139blockReduction(elemT* shared_mem, int thread_in_block, int block_threads)
140{
141 for (int stride = block_threads / 2; stride > 0; stride /= 2)
142 {
143 if (thread_in_block < stride)
144 shared_mem[thread_in_block] += shared_mem[thread_in_block + stride];
145 __syncthreads();
146 }
147}
148
150template <typename elemT>
151__device__ inline double
152atomicAddGeneric(double* address, elemT val)
153{
154# if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600
155 return atomicAdd(address, static_cast<double>(val));
156# else
157 if (threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0 && blockIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0)
158 {
159 printf("CudaGibbsPenalty: atomicAdd(double) unsupported on this GPU. "
160 "Upgrade to compute capability >= 6.0 or check code at "
161 "sources/STIR/src/include/stir/cuda_utilities.h:108.\n");
162 asm volatile("trap;");
163 }
164 return 0.0; // never reached
165 // Emulate atomicAdd for double precision on pre-Pascal architectures
166 // unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(address);
167 // unsigned long long int old = *address_as_ull, assumed;
168
169 // do
170 // {
171 // assumed = old;
172 // double updated = __longlong_as_double(assumed) + dval;
173 // old = atomicCAS(address_as_ull, assumed, __double_as_longlong(updated));
174 // } while (assumed != old);
175
176 // return __longlong_as_double(old);
177# endif
178}
179
181inline void
182checkCudaError(const std::string& operation)
183{
184 cudaError_t cuda_error = cudaGetLastError();
185 if (cuda_error != cudaSuccess)
186 {
187 const char* err = cudaGetErrorString(cuda_error);
188 error(std::string("CudaGibbsPrior: CUDA error in ") + operation + ": " + err);
189 }
190}
191#endif
192
193END_NAMESPACE_STIR
194#endif
defines the stir::Array class for multi-dimensional (numeric) arrays
Declaration of stir::error()
void info(const STRING &string, const int verbosity_level=1)
Use this function for writing informational messages.
Definition info.h:51
Declaration of stir::info()