HiRep 0.1
Loading...
Searching...
No Matches
single_double_utils_gpu_kernels.hpp
1#ifndef SINGLE_DOUBLE_UTILS_GPU_KERNELS_HPP
2#define SINGLE_DOUBLE_UTILS_GPU_KERNELS_HPP
3
4#ifdef WITH_GPU
5
6__global__ void assign_ud2u_kernel(suNg_flt *gauge_flt, suNg *gauge, int N) {
7 double d;
8 float *f;
9 const size_t n_doubles = 4 * N * sizeof(suNg) / sizeof(double);
10 for (int ix = blockIdx.x * blockDim.x + threadIdx.x; ix < n_doubles; ix += blockDim.x * gridDim.x) {
11 d = *(((double *)gauge) + ix);
12 f = ((float *)gauge_flt) + ix;
13 *f = (float)d;
14 }
15}
16
17__global__ void assign_u2ud_kernel(suNg *gauge, suNg_flt *gauge_flt, int N) {
18 double *d;
19 float f;
20 const size_t n_floats = 4 * N * sizeof(suNg_flt) / sizeof(float);
21 for (int ix = blockIdx.x * blockDim.x + threadIdx.x; ix < n_floats; ix += blockDim.x * gridDim.x) {
22 f = *(((float *)gauge_flt) + ix);
23 d = ((double *)gauge) + ix;
24 *d = (double)f;
25 }
26}
27
28__global__ void assign_ud2u_f_kernel(suNf_flt *gauge_flt, suNf *gauge, int N) {
29 double d;
30 float *f;
31 const size_t n_doubles = 4 * N * sizeof(suNf) / sizeof(double);
32 for (int ix = blockIdx.x * blockDim.x + threadIdx.x; ix < n_doubles; ix += blockDim.x * gridDim.x) {
33 d = *(((double *)gauge) + ix);
34 f = ((float *)gauge_flt) + ix;
35 *f = (float)d;
36 }
37}
38
39__global__ void assign_u2ud_f_kernel(suNf *gauge, suNf_flt *gauge_flt, int N) {
40 double *d;
41 float f;
42 const size_t n_floats = 4 * N * sizeof(suNf_flt) / sizeof(float);
43 for (int ix = blockIdx.x * blockDim.x + threadIdx.x; ix < n_floats; ix += blockDim.x * gridDim.x) {
44 f = *(((float *)gauge_flt) + ix);
45 d = ((double *)gauge) + ix;
46 *d = (double)f;
47 }
48}
49
50__global__ void assign_s2sd_kernel(suNf_spinor *out, suNf_spinor_flt *in, int N) {
51 float f;
52 double *d;
53 const size_t n_floats = N * sizeof(*in) / sizeof(float);
54 for (int ix = blockIdx.x * blockDim.x + threadIdx.x; ix < n_floats; ix += blockDim.x * gridDim.x) {
55 f = *(((float *)in) + ix);
56 d = ((double *)out) + ix;
57 *d = (double)f;
58 }
59}
60
61__global__ void assign_sd2s_kernel(suNf_spinor_flt *out, suNf_spinor *in, int N) {
62 double d;
63 float *f;
64 const size_t n_floats = N * sizeof(*in) / sizeof(double);
65 for (int ix = blockIdx.x * blockDim.x + threadIdx.x; ix < n_floats; ix += blockDim.x * gridDim.x) {
66 d = *((double *)in + ix);
67 f = (float *)out + ix;
68 *f = (float)d;
69 }
70}
71
72__global__ void add_assign_s2sd_kernel(suNf_spinor *out, suNf_spinor_flt *in, int N) {
73 float f;
74 double *d;
75 const size_t n_floats = N * sizeof(*in) / sizeof(float);
76 for (int ix = blockIdx.x * blockDim.x + threadIdx.x; ix < n_floats; ix += blockDim.x * gridDim.x) {
77 f = *(((float *)in) + ix);
78 d = ((double *)out) + ix;
79 *d += (double)f;
80 }
81}
82
83__global__ void add_assign_sd2s_kernel(suNf_spinor_flt *out, suNf_spinor *in, int N) {
84 double d;
85 float *f;
86 const size_t n_floats = N * sizeof(*in) / sizeof(double);
87 for (int ix = blockIdx.x * blockDim.x + threadIdx.x; ix < n_floats; ix += blockDim.x * gridDim.x) {
88 d = *((double *)in + ix);
89 f = (float *)out + ix;
90 *f += (float)d;
91 }
92}
93
94#endif
95#endif