HiRep 0.1
Loading...
Searching...
No Matches
convert_kernels.hpp
1/***************************************************************************\
2* Copyright (c) 2023, Sofie Martins *
3* All rights reserved. *
4\***************************************************************************/
5
6#ifndef CONVERT_KERNELS_HPP
7#define CONVERT_KERNELS_HPP
8
9#ifdef WITH_GPU
10
11#include "libhr_core.h"
12#include "geometry.h"
13
14template <int FIELD_DIM, typename REAL, typename SITE_TYPE>
15__global__ void to_cpu_format_kernel(SITE_TYPE *out, SITE_TYPE *in, size_t N, size_t block_size, size_t master_shift) {
16 for (size_t id = blockIdx.x * blockDim.x + threadIdx.x; id < N; id += blockDim.x * gridDim.x) {
17 SITE_TYPE *target;
18 size_t ix = id + block_size;
19 for (int comp = 0; comp < FIELD_DIM; ++comp) {
20 target = _DFIELD_AT_PTR(out, ix, comp, master_shift, FIELD_DIM);
21 read_gpu<REAL>(0, target, in, (ix - master_shift), comp, FIELD_DIM);
22 }
23 }
24}
25
26template <int FIELD_DIM, typename REAL, typename SITE_TYPE>
27__global__ void to_gpu_format_kernel(SITE_TYPE *out, SITE_TYPE *in, size_t N, size_t block_size, size_t master_shift) {
28 for (size_t id = blockIdx.x * blockDim.x + threadIdx.x; id < N; id += blockDim.x * gridDim.x) {
29 SITE_TYPE *s;
30 size_t ix = id + block_size;
31 for (int comp = 0; comp < FIELD_DIM; ++comp) {
32 s = _DFIELD_AT_PTR(in, ix, comp, master_shift, FIELD_DIM);
33 write_gpu<REAL>(0, s, out, (ix - master_shift), comp, FIELD_DIM);
34 }
35 }
36}
37
38template <int FIELD_DIM, typename REAL, typename FIELD_TYPE> void to_cpu_format_convert(FIELD_TYPE *out, FIELD_TYPE *in) {
39 _PIECE_FOR(in->type, ixp) {
40 const size_t N = in->type->master_end[ixp] - in->type->master_start[ixp] + 1;
41 const size_t block_start = in->type->master_start[ixp];
42 const int grid = (N - 1) / BLOCK_SIZE + 1;
43 to_cpu_format_kernel<FIELD_DIM, REAL>
44 <<<grid, BLOCK_SIZE, 0, 0>>>(out->gpu_ptr, in->gpu_ptr, N, block_start, in->type->master_shift);
46 }
47}
48
49template <int FIELD_DIM, typename REAL, typename FIELD_TYPE> void to_gpu_format_convert(FIELD_TYPE *out, FIELD_TYPE *in) {
50 _PIECE_FOR(in->type, ixp) {
51 const size_t N = in->type->master_end[ixp] - in->type->master_start[ixp] + 1;
52 const size_t block_start = in->type->master_start[ixp];
53 const int grid = (N - 1) / BLOCK_SIZE + 1;
54 to_gpu_format_kernel<FIELD_DIM, REAL>
55 <<<grid, BLOCK_SIZE, 0, 0>>>(out->gpu_ptr, in->gpu_ptr, N, block_start, in->type->master_shift);
57 }
58}
59
60#endif
61#endif
#define CudaCheckError()
Check last error after CUDA calls.
Definition error_gpu.h:41
This file contains information on the geometry of the local lattice, block decomposed geometry,...
#define _PIECE_FOR(type, ip)
Iterate over the pieces of the given geometry.
Definition geometry_omp.h:19