11#ifndef __stir_cuda_utilities_H__
12#define __stir_cuda_utilities_H__
26# include <cuda_runtime.h>
45 unsigned int x = 1, y = 1, z = 1;
49 int x = 0, y = 0, z = 0;
52typedef dim3 cuda_dim3;
53typedef int3 cuda_int3;
62template <
int num_dimensions,
typename elemT>
64array_to_device(elemT* dev_data,
const Array<num_dimensions, elemT>& stir_array)
66 if (stir_array.is_contiguous())
68 info(
"array_to_device contiguous", 100);
69 cudaMemcpy(dev_data, stir_array.get_const_full_data_ptr(), stir_array.size_all() *
sizeof(elemT), cudaMemcpyHostToDevice);
70 stir_array.release_const_full_data_ptr();
74 info(
"array_to_device non-contiguous", 100);
76 std::vector<elemT> tmp_data(stir_array.size_all());
77 std::copy(stir_array.begin_all(), stir_array.end_all(), tmp_data.begin());
78 cudaMemcpy(dev_data, tmp_data.data(), stir_array.size_all() *
sizeof(elemT), cudaMemcpyHostToDevice);
86template <
int num_dimensions,
typename elemT>
88array_to_device(CuVec<elemT>& dev_data,
const Array<num_dimensions, elemT>& stir_array)
90 dev_data.resize(stir_array.size_all());
91 std::copy(stir_array.begin_all(), stir_array.end_all(), dev_data.begin());
99template <
int num_dimensions,
typename elemT>
101array_to_host(Array<num_dimensions, elemT>& stir_array,
const elemT* dev_data,
bool =
true)
103 if (stir_array.is_contiguous())
105 info(
"array_to_host contiguous", 100);
106 cudaMemcpy(stir_array.get_full_data_ptr(), dev_data, stir_array.size_all() *
sizeof(elemT), cudaMemcpyDeviceToHost);
107 stir_array.release_full_data_ptr();
111 info(
"array_to_host non-contiguous", 100);
113 std::vector<elemT> tmp_data(stir_array.size_all());
114 cudaMemcpy(tmp_data.data(), dev_data, stir_array.size_all() *
sizeof(elemT), cudaMemcpyDeviceToHost);
116 std::copy(tmp_data.begin(), tmp_data.end(), stir_array.begin_all());
125template <
int num_dimensions,
typename elemT>
127array_to_host(Array<num_dimensions, elemT>& stir_array,
const CuVec<elemT>& dev_data,
bool sync =
true)
130 cudaDeviceSynchronize();
131 if (stir_array.size_all() != dev_data.size())
132 error(
"array_to_host: size mismatch between CuVec and Array");
133 std::copy(dev_data.begin(), dev_data.end(), stir_array.begin_all());
137template <
typename elemT>
138__device__
inline void
139blockReduction(elemT* shared_mem,
int thread_in_block,
int block_threads)
141 for (
int stride = block_threads / 2; stride > 0; stride /= 2)
143 if (thread_in_block < stride)
144 shared_mem[thread_in_block] += shared_mem[thread_in_block + stride];
150template <
typename elemT>
151__device__
inline double
152atomicAddGeneric(
double* address, elemT val)
154# if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600
155 return atomicAdd(address,
static_cast<double>(val));
157 if (threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0 && blockIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0)
159 printf(
"CudaGibbsPenalty: atomicAdd(double) unsupported on this GPU. "
160 "Upgrade to compute capability >= 6.0 or check code at "
161 "sources/STIR/src/include/stir/cuda_utilities.h:108.\n");
162 asm volatile(
"trap;");
182checkCudaError(
const std::string& operation)
184 cudaError_t cuda_error = cudaGetLastError();
185 if (cuda_error != cudaSuccess)
187 const char* err = cudaGetErrorString(cuda_error);
188 error(std::string(
"CudaGibbsPrior: CUDA error in ") + operation +
": " + err);
defines the stir::Array class for multi-dimensional (numeric) arrays
Declaration of stir::error()
void info(const STRING &string, const int verbosity_level=1)
Use this function for writing informational messages.
Definition info.h:51
Declaration of stir::info()