1#ifndef SINGLE_DOUBLE_UTILS_GPU_KERNELS_HPP
2#define SINGLE_DOUBLE_UTILS_GPU_KERNELS_HPP
6__global__
void assign_ud2u_kernel(suNg_flt *gauge_flt, suNg *gauge,
int N) {
9 const size_t n_doubles = 4 * N *
sizeof(suNg) /
sizeof(
double);
10 for (
int ix = blockIdx.x * blockDim.x + threadIdx.x; ix < n_doubles; ix += blockDim.x * gridDim.x) {
11 d = *(((
double *)gauge) + ix);
12 f = ((
float *)gauge_flt) + ix;
17__global__
void assign_u2ud_kernel(suNg *gauge, suNg_flt *gauge_flt,
int N) {
20 const size_t n_floats = 4 * N *
sizeof(suNg_flt) /
sizeof(
float);
21 for (
int ix = blockIdx.x * blockDim.x + threadIdx.x; ix < n_floats; ix += blockDim.x * gridDim.x) {
22 f = *(((
float *)gauge_flt) + ix);
23 d = ((
double *)gauge) + ix;
28__global__
void assign_ud2u_f_kernel(suNf_flt *gauge_flt, suNf *gauge,
int N) {
31 const size_t n_doubles = 4 * N *
sizeof(suNf) /
sizeof(
double);
32 for (
int ix = blockIdx.x * blockDim.x + threadIdx.x; ix < n_doubles; ix += blockDim.x * gridDim.x) {
33 d = *(((
double *)gauge) + ix);
34 f = ((
float *)gauge_flt) + ix;
39__global__
void assign_u2ud_f_kernel(suNf *gauge, suNf_flt *gauge_flt,
int N) {
42 const size_t n_floats = 4 * N *
sizeof(suNf_flt) /
sizeof(
float);
43 for (
int ix = blockIdx.x * blockDim.x + threadIdx.x; ix < n_floats; ix += blockDim.x * gridDim.x) {
44 f = *(((
float *)gauge_flt) + ix);
45 d = ((
double *)gauge) + ix;
50__global__
void assign_s2sd_kernel(suNf_spinor *out, suNf_spinor_flt *in,
int N) {
53 const size_t n_floats = N *
sizeof(*in) /
sizeof(float);
54 for (
int ix = blockIdx.x * blockDim.x + threadIdx.x; ix < n_floats; ix += blockDim.x * gridDim.x) {
55 f = *(((
float *)in) + ix);
56 d = ((
double *)out) + ix;
61__global__
void assign_sd2s_kernel(suNf_spinor_flt *out, suNf_spinor *in,
int N) {
64 const size_t n_floats = N *
sizeof(*in) /
sizeof(double);
65 for (
int ix = blockIdx.x * blockDim.x + threadIdx.x; ix < n_floats; ix += blockDim.x * gridDim.x) {
66 d = *((
double *)in + ix);
67 f = (
float *)out + ix;
72__global__
void add_assign_s2sd_kernel(suNf_spinor *out, suNf_spinor_flt *in,
int N) {
75 const size_t n_floats = N *
sizeof(*in) /
sizeof(float);
76 for (
int ix = blockIdx.x * blockDim.x + threadIdx.x; ix < n_floats; ix += blockDim.x * gridDim.x) {
77 f = *(((
float *)in) + ix);
78 d = ((
double *)out) + ix;
83__global__
void add_assign_sd2s_kernel(suNf_spinor_flt *out, suNf_spinor *in,
int N) {
86 const size_t n_floats = N *
sizeof(*in) /
sizeof(double);
87 for (
int ix = blockIdx.x * blockDim.x + threadIdx.x; ix < n_floats; ix += blockDim.x * gridDim.x) {
88 d = *((
double *)in + ix);
89 f = (
float *)out + ix;