STIR  6.3.0
cuda_utilities.h
Go to the documentation of this file.
1 /*
2  Copyright (C) 2024, University College London
3  Copyright (C) 2025, University of Milano-Bicocca
4  This file is part of STIR.
5 
6  SPDX-License-Identifier: Apache-2.0
7 
8  See STIR/LICENSE.txt for details
9 */
10 
11 #ifndef __stir_cuda_utilities_H__
12 #define __stir_cuda_utilities_H__
13 
22 #include "stir/Array.h"
23 #include "stir/info.h"
24 #include "stir/error.h"
25 #include <vector>
26 
27 START_NAMESPACE_STIR
28 
29 #ifndef __CUDACC__
30 # ifndef __host__
31 # define __host__
32 # endif
33 # ifndef __device__
34 # define __device__
35 # endif
36 #endif
37 
38 #ifndef __CUDACC__
39 struct cuda_dim3
40 {
41  unsigned int x = 1, y = 1, z = 1;
42 };
43 struct cuda_int3
44 {
45  int x = 0, y = 0, z = 0;
46 };
47 #else
48 # include <cuda_runtime.h>
49 typedef dim3 cuda_dim3;
50 typedef int3 cuda_int3;
51 #endif
52 
53 #ifdef __CUDACC__
54 template <int num_dimensions, typename elemT>
55 inline void
56 array_to_device(elemT* dev_data, const Array<num_dimensions, elemT>& stir_array)
57 {
58  if (stir_array.is_contiguous())
59  {
60  info("array_to_device contiguous", 100);
61  cudaMemcpy(dev_data, stir_array.get_const_full_data_ptr(), stir_array.size_all() * sizeof(elemT), cudaMemcpyHostToDevice);
62  stir_array.release_const_full_data_ptr();
63  }
64  else
65  {
66  info("array_to_device non-contiguous", 100);
67  // Allocate host memory to get contiguous vector, copy array to it and copy from device to host
68  std::vector<elemT> tmp_data(stir_array.size_all());
69  std::copy(stir_array.begin_all(), stir_array.end_all(), tmp_data.begin());
70  cudaMemcpy(dev_data, tmp_data.data(), stir_array.size_all() * sizeof(elemT), cudaMemcpyHostToDevice);
71  }
72 }
73 
74 template <int num_dimensions, typename elemT>
75 inline void
76 array_to_host(Array<num_dimensions, elemT>& stir_array, const elemT* dev_data)
77 {
78  if (stir_array.is_contiguous())
79  {
80  info("array_to_host contiguous", 100);
81  cudaMemcpy(stir_array.get_full_data_ptr(), dev_data, stir_array.size_all() * sizeof(elemT), cudaMemcpyDeviceToHost);
82  stir_array.release_full_data_ptr();
83  }
84  else
85  {
86  info("array_to_host non-contiguous", 100);
87  // Allocate host memory for the result and copy from device to host
88  std::vector<elemT> tmp_data(stir_array.size_all());
89  cudaMemcpy(tmp_data.data(), dev_data, stir_array.size_all() * sizeof(elemT), cudaMemcpyDeviceToHost);
90  // Copy the data to the stir_array
91  std::copy(tmp_data.begin(), tmp_data.end(), stir_array.begin_all());
92  }
93 }
94 
96 template <typename elemT>
97 __device__ inline void
98 blockReduction(elemT* shared_mem, int thread_in_block, int block_threads)
99 {
100  for (int stride = block_threads / 2; stride > 0; stride /= 2)
101  {
102  if (thread_in_block < stride)
103  shared_mem[thread_in_block] += shared_mem[thread_in_block + stride];
104  __syncthreads();
105  }
106 }
107 
109 template <typename elemT>
110 __device__ inline double
111 atomicAddGeneric(double* address, elemT val)
112 {
113 # if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600
114  return atomicAdd(address, static_cast<double>(val));
115 # else
116  if (threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0 && blockIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0)
117  {
118  printf("CudaGibbsPenalty: atomicAdd(double) unsupported on this GPU. "
119  "Upgrade to compute capability >= 6.0 or check code at "
120  "sources/STIR/src/include/stir/cuda_utilities.h:108.\n");
121  asm volatile("trap;");
122  }
123  return 0.0; // never reached
124  // Emulate atomicAdd for double precision on pre-Pascal architectures
125  // unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(address);
126  // unsigned long long int old = *address_as_ull, assumed;
127 
128  // do
129  // {
130  // assumed = old;
131  // double updated = __longlong_as_double(assumed) + dval;
132  // old = atomicCAS(address_as_ull, assumed, __double_as_longlong(updated));
133  // } while (assumed != old);
134 
135  // return __longlong_as_double(old);
136 # endif
137 }
138 
140 inline void
141 checkCudaError(const std::string& operation)
142 {
143  cudaError_t cuda_error = cudaGetLastError();
144  if (cuda_error != cudaSuccess)
145  {
146  const char* err = cudaGetErrorString(cuda_error);
147  error(std::string("CudaGibbsPrior: CUDA error in ") + operation + ": " + err);
148  }
149 }
150 #endif
151 
152 END_NAMESPACE_STIR
153 #endif
void info(const STRING &string, const int verbosity_level=1)
Use this function for writing informational messages.
Definition: info.h:51
defines the stir::Array class for multi-dimensional (numeric) arrays
Declaration of stir::error()
Declaration of stir::info()
void error(const char *const s,...)
Print error with format string a la printf and throw exception.
Definition: error.cxx:42