11 #ifndef __stir_cuda_utilities_H__ 12 #define __stir_cuda_utilities_H__ 41 unsigned int x = 1, y = 1, z = 1;
45 int x = 0, y = 0, z = 0;
48 # include <cuda_runtime.h> 49 typedef dim3 cuda_dim3;
50 typedef int3 cuda_int3;
54 template <
int num_dimensions,
typename elemT>
56 array_to_device(elemT* dev_data,
const Array<num_dimensions, elemT>& stir_array)
58 if (stir_array.is_contiguous())
60 info(
"array_to_device contiguous", 100);
61 cudaMemcpy(dev_data, stir_array.get_const_full_data_ptr(), stir_array.size_all() *
sizeof(elemT), cudaMemcpyHostToDevice);
62 stir_array.release_const_full_data_ptr();
66 info(
"array_to_device non-contiguous", 100);
68 std::vector<elemT> tmp_data(stir_array.size_all());
69 std::copy(stir_array.begin_all(), stir_array.end_all(), tmp_data.begin());
70 cudaMemcpy(dev_data, tmp_data.data(), stir_array.size_all() *
sizeof(elemT), cudaMemcpyHostToDevice);
74 template <
int num_dimensions,
typename elemT>
76 array_to_host(Array<num_dimensions, elemT>& stir_array,
const elemT* dev_data)
78 if (stir_array.is_contiguous())
80 info(
"array_to_host contiguous", 100);
81 cudaMemcpy(stir_array.get_full_data_ptr(), dev_data, stir_array.size_all() *
sizeof(elemT), cudaMemcpyDeviceToHost);
82 stir_array.release_full_data_ptr();
86 info(
"array_to_host non-contiguous", 100);
88 std::vector<elemT> tmp_data(stir_array.size_all());
89 cudaMemcpy(tmp_data.data(), dev_data, stir_array.size_all() *
sizeof(elemT), cudaMemcpyDeviceToHost);
91 std::copy(tmp_data.begin(), tmp_data.end(), stir_array.begin_all());
96 template <
typename elemT>
97 __device__
inline void 98 blockReduction(elemT* shared_mem,
int thread_in_block,
int block_threads)
100 for (
int stride = block_threads / 2; stride > 0; stride /= 2)
102 if (thread_in_block < stride)
103 shared_mem[thread_in_block] += shared_mem[thread_in_block + stride];
109 template <
typename elemT>
110 __device__
inline double 111 atomicAddGeneric(
double* address, elemT val)
113 # if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600 114 return atomicAdd(address, static_cast<double>(val));
116 if (threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0 && blockIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0)
118 printf(
"CudaGibbsPenalty: atomicAdd(double) unsupported on this GPU. " 119 "Upgrade to compute capability >= 6.0 or check code at " 120 "sources/STIR/src/include/stir/cuda_utilities.h:108.\n");
121 asm volatile(
"trap;");
141 checkCudaError(
const std::string& operation)
143 cudaError_t cuda_error = cudaGetLastError();
144 if (cuda_error != cudaSuccess)
146 const char* err = cudaGetErrorString(cuda_error);
147 error(std::string(
"CudaGibbsPrior: CUDA error in ") + operation +
": " + err);
void info(const STRING &string, const int verbosity_level=1)
Use this function for writing informational messages.
Definition: info.h:51
defines the stir::Array class for multi-dimensional (numeric) arrays
Declaration of stir::error()
Declaration of stir::info()
void error(const char *const s,...)
Print error with format string a la printf and throw exception.
Definition: error.cxx:42