6#ifndef CONVERT_KERNELS_HPP 
    7#define CONVERT_KERNELS_HPP 
   11#include "libhr_core.h" 
   14template <
int FIELD_DIM, 
typename REAL, 
typename SITE_TYPE>
 
   15__global__ 
void to_cpu_format_kernel(SITE_TYPE *out, SITE_TYPE *in, 
size_t N, 
size_t block_size, 
size_t master_shift) {
 
   16    for (
size_t id = blockIdx.x * blockDim.x + threadIdx.x; 
id < N; 
id += blockDim.x * gridDim.x) {
 
   18        size_t ix = 
id + block_size;
 
   19        for (
int comp = 0; comp < FIELD_DIM; ++comp) {
 
   20            target = _DFIELD_AT_PTR(out, ix, comp, master_shift, FIELD_DIM);
 
   21            read_gpu<REAL>(0, target, in, (ix - master_shift), comp, FIELD_DIM);
 
   26template <
int FIELD_DIM, 
typename REAL, 
typename SITE_TYPE>
 
   27__global__ 
void to_gpu_format_kernel(SITE_TYPE *out, SITE_TYPE *in, 
size_t N, 
size_t block_size, 
size_t master_shift) {
 
   28    for (
size_t id = blockIdx.x * blockDim.x + threadIdx.x; 
id < N; 
id += blockDim.x * gridDim.x) {
 
   30        size_t ix = 
id + block_size;
 
   31        for (
int comp = 0; comp < FIELD_DIM; ++comp) {
 
   32            s = _DFIELD_AT_PTR(in, ix, comp, master_shift, FIELD_DIM);
 
   33            write_gpu<REAL>(0, s, out, (ix - master_shift), comp, FIELD_DIM);
 
   38template <
int FIELD_DIM, 
typename REAL, 
typename FIELD_TYPE> 
void to_cpu_format_convert(FIELD_TYPE *out, FIELD_TYPE *in) {
 
   40        const size_t N = in->type->master_end[ixp] - in->type->master_start[ixp] + 1;
 
   41        const size_t block_start = in->type->master_start[ixp];
 
   42        const int grid = (N - 1) / BLOCK_SIZE + 1;
 
   43        to_cpu_format_kernel<FIELD_DIM, REAL>
 
   44            <<<grid, BLOCK_SIZE, 0, 0>>>(out->gpu_ptr, in->gpu_ptr, N, block_start, in->type->master_shift);
 
   49template <
int FIELD_DIM, 
typename REAL, 
typename FIELD_TYPE> 
void to_gpu_format_convert(FIELD_TYPE *out, FIELD_TYPE *in) {
 
   51        const size_t N = in->type->master_end[ixp] - in->type->master_start[ixp] + 1;
 
   52        const size_t block_start = in->type->master_start[ixp];
 
   53        const int grid = (N - 1) / BLOCK_SIZE + 1;
 
   54        to_gpu_format_kernel<FIELD_DIM, REAL>
 
   55            <<<grid, BLOCK_SIZE, 0, 0>>>(out->gpu_ptr, in->gpu_ptr, N, block_start, in->type->master_shift);
 
#define CudaCheckError()
Check last error after CUDA calls.
Definition error_gpu.h:41
This file contains information on the geometry of the local lattice, block decomposed geometry,...
#define _PIECE_FOR(type, ip)
Iterate over the pieces of the given geometry.
Definition geometry_omp.h:19