6#ifndef CONVERT_KERNELS_HPP
7#define CONVERT_KERNELS_HPP
11#include "libhr_core.h"
14template <
int FIELD_DIM,
typename REAL,
typename SITE_TYPE>
15__global__
void to_cpu_format_kernel(SITE_TYPE *out, SITE_TYPE *in,
size_t N,
size_t block_size,
size_t master_shift) {
16 for (
size_t id = blockIdx.x * blockDim.x + threadIdx.x;
id < N;
id += blockDim.x * gridDim.x) {
18 size_t ix =
id + block_size;
19 for (
int comp = 0; comp < FIELD_DIM; ++comp) {
20 target = _DFIELD_AT_PTR(out, ix, comp, master_shift, FIELD_DIM);
21 read_gpu<REAL>(0, target, in, (ix - master_shift), comp, FIELD_DIM);
26template <
int FIELD_DIM,
typename REAL,
typename SITE_TYPE>
27__global__
void to_gpu_format_kernel(SITE_TYPE *out, SITE_TYPE *in,
size_t N,
size_t block_size,
size_t master_shift) {
28 for (
size_t id = blockIdx.x * blockDim.x + threadIdx.x;
id < N;
id += blockDim.x * gridDim.x) {
30 size_t ix =
id + block_size;
31 for (
int comp = 0; comp < FIELD_DIM; ++comp) {
32 s = _DFIELD_AT_PTR(in, ix, comp, master_shift, FIELD_DIM);
33 write_gpu<REAL>(0, s, out, (ix - master_shift), comp, FIELD_DIM);
38template <
int FIELD_DIM,
typename REAL,
typename FIELD_TYPE>
void to_cpu_format_convert(FIELD_TYPE *out, FIELD_TYPE *in) {
40 const size_t N = in->type->master_end[ixp] - in->type->master_start[ixp] + 1;
41 const size_t block_start = in->type->master_start[ixp];
42 const int grid = (N - 1) / BLOCK_SIZE + 1;
43 to_cpu_format_kernel<FIELD_DIM, REAL>
44 <<<grid, BLOCK_SIZE, 0, 0>>>(out->gpu_ptr, in->gpu_ptr, N, block_start, in->type->master_shift);
49template <
int FIELD_DIM,
typename REAL,
typename FIELD_TYPE>
void to_gpu_format_convert(FIELD_TYPE *out, FIELD_TYPE *in) {
51 const size_t N = in->type->master_end[ixp] - in->type->master_start[ixp] + 1;
52 const size_t block_start = in->type->master_start[ixp];
53 const int grid = (N - 1) / BLOCK_SIZE + 1;
54 to_gpu_format_kernel<FIELD_DIM, REAL>
55 <<<grid, BLOCK_SIZE, 0, 0>>>(out->gpu_ptr, in->gpu_ptr, N, block_start, in->type->master_shift);
#define CudaCheckError()
Check last error after CUDA calls.
Definition error_gpu.h:41
This file contains information on the geometry of the local lattice, block decomposed geometry,...
#define _PIECE_FOR(type, ip)
Iterate over the pieces of the given geometry.
Definition geometry_omp.h:19