11#include "libhr_core.h"
14#define ipt_ext_gpu_loc(t, x, y, z) ipt_gpu_d[_lexi(T_EXT_GPU, X_EXT_GPU, Y_EXT_GPU, Z_EXT_GPU, t, x, y, z)]
20__global__
void apply_cl_SF_BCs(suNfc *C,
int border,
int *ipt_gpu_d,
int xmax,
int ymax,
int zmax) {
21 for (
int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
22 for (
int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
23 for (
int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
28 index = ipt_ext_gpu_loc(border - 1, ix, iy, iz);
30 write_gpu<double>(0, &c, C, index, 0, 4);
31 write_gpu<double>(0, &c, C, index, 1, 4);
32 write_gpu<double>(0, &c, C, index, 2, 4);
33 write_gpu<double>(0, &c, C, index, 3, 4);
37 index = ipt_ext_gpu_loc(border, ix, iy, iz);
39 write_gpu<double>(0, &c, C, index, 0, 4);
40 write_gpu<double>(0, &c, C, index, 1, 4);
41 write_gpu<double>(0, &c, C, index, 2, 4);
42 write_gpu<double>(0, &c, C, index, 3, 4);
45 index = ipt_ext_gpu_loc(border + 1, ix, iy, iz);
47 write_gpu<double>(0, &c, C, index, 0, 4);
48 write_gpu<double>(0, &c, C, index, 1, 4);
49 write_gpu<double>(0, &c, C, index, 2, 4);
50 write_gpu<double>(0, &c, C, index, 3, 4);
57__global__
void apply_cl_open_BCs1(suNfc *C,
int border,
int *ipt_gpu_d,
int xmax,
int ymax,
int zmax) {
58 for (
int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
59 for (
int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
60 for (
int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
65 index = ipt_ext_gpu_loc(border - 1, ix, iy, iz);
67 write_gpu<double>(0, &c, C, index, 0, 4);
68 write_gpu<double>(0, &c, C, index, 1, 4);
69 write_gpu<double>(0, &c, C, index, 2, 4);
70 write_gpu<double>(0, &c, C, index, 3, 4);
74 index = ipt_ext_gpu_loc(border, ix, iy, iz);
76 write_gpu<double>(0, &c, C, index, 0, 4);
77 write_gpu<double>(0, &c, C, index, 1, 4);
78 write_gpu<double>(0, &c, C, index, 2, 4);
79 write_gpu<double>(0, &c, C, index, 3, 4);
86__global__
void apply_cl_open_BCs2(suNfc *C,
int border,
int *ipt_gpu_d,
int xmax,
int ymax,
int zmax) {
87 for (
int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
88 for (
int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
89 for (
int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
94 index = ipt_ext_gpu_loc(T_GPU + border, ix, iy, iz);
96 write_gpu<double>(0, &c, C, index, 0, 4);
97 write_gpu<double>(0, &c, C, index, 1, 4);
98 write_gpu<double>(0, &c, C, index, 2, 4);
99 write_gpu<double>(0, &c, C, index, 3, 4);
103 index = ipt_ext_gpu_loc(T_GPU + border - 1, ix, iy, iz);
105 write_gpu<double>(0, &c, C, index, 0, 4);
106 write_gpu<double>(0, &c, C, index, 1, 4);
107 write_gpu<double>(0, &c, C, index, 2, 4);
108 write_gpu<double>(0, &c, C, index, 3, 4);
119__global__
void apply_boundary_conditions_T(suNf *g,
int border,
int *ipt_gpu_d,
int xmax,
int ymax,
int zmax) {
120 for (
int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
121 for (
int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
122 for (
int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
123 int index = ipt_ext_gpu_loc(2 * border, ix, iy, iz);
126 read_gpu<double>(0, &u, g, index, 0, 4);
128 write_gpu<double>(0, &u, g, index, 0, 4);
135__global__
void apply_boundary_conditions_X(suNf *g,
int border,
int *ipt_gpu_d,
int tmax,
int ymax,
int zmax) {
136 for (
int it = blockDim.x * blockIdx.x + threadIdx.x; it < tmax; it += gridDim.x * blockDim.x) {
137 for (
int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
138 for (
int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
139 int index = ipt_ext_gpu_loc(it, 2 * border, iy, iz);
142 read_gpu<double>(0, &u, g, index, 1, 4);
144 write_gpu<double>(0, &u, g, index, 1, 4);
151__global__
void apply_boundary_conditions_Y(suNf *g,
int border,
int *ipt_gpu_d,
int tmax,
int xmax,
int zmax) {
152 for (
int it = blockDim.x * blockIdx.x + threadIdx.x; it < tmax; it += gridDim.x * blockDim.x) {
153 for (
int ix = blockDim.y * blockIdx.y + threadIdx.y; ix < xmax; ix += gridDim.y * blockDim.y) {
154 for (
int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
155 int index = ipt_ext_gpu_loc(it, ix, 2 * border, iz);
158 read_gpu<double>(0, &u, g, index, 2, 4);
160 write_gpu<double>(0, &u, g, index, 2, 4);
167__global__
void apply_boundary_conditions_Z(suNf *g,
int border,
int *ipt_gpu_d,
int tmax,
int xmax,
int ymax) {
168 for (
int it = blockDim.x * blockIdx.x + threadIdx.x; it < tmax; it += gridDim.x * blockDim.x) {
169 for (
int ix = blockDim.y * blockIdx.y + threadIdx.y; ix < xmax; ix += gridDim.y * blockDim.y) {
170 for (
int iy = blockDim.z * blockIdx.z + threadIdx.z; iy < ymax; iy += gridDim.z * blockDim.z) {
171 int index = ipt_ext_gpu_loc(it, ix, iy, 2 * border);
174 read_gpu<double>(0, &u, g, index, 3, 4);
176 write_gpu<double>(0, &u, g, index, 3, 4);
183__global__
void apply_chiSF_ds_BT(
double ds, suNf *g,
int border,
int *ipt_gpu_d,
int xmax,
int ymax,
int zmax) {
184 for (
int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
185 for (
int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
186 for (
int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
187 int index = ipt_ext_gpu_loc(border, ix, iy, iz);
190 read_gpu<double>(0, &u, g, index, 1, 4);
192 write_gpu<double>(0, &u, g, index, 1, 4);
194 read_gpu<double>(0, &u, g, index, 2, 4);
196 write_gpu<double>(0, &u, g, index, 2, 4);
198 read_gpu<double>(0, &u, g, index, 3, 4);
200 write_gpu<double>(0, &u, g, index, 3, 4);
211__global__
void apply_gf_SF_BCs_1(suNg *g, suNg *up, suNg *dn,
int border,
int *ipt_gpu_d,
int xmax,
int ymax,
int zmax) {
212 for (
int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
213 for (
int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
214 for (
int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
215 int index = ipt_ext_gpu_loc(border - 1, ix, iy, iz);
220 write_gpu<double>(0, &u, g, index, 0, 4);
222 write_gpu<double>(0, &u, g, index, 1, 4);
224 write_gpu<double>(0, &u, g, index, 2, 4);
226 write_gpu<double>(0, &u, g, index, 3, 4);
230 index = ipt_ext_gpu_loc(border, ix, iy, iz);
234 write_gpu<double>(0, &u, g, index, 0, 4);
236 write_gpu<double>(0, &u, g, index, 1, 4);
238 write_gpu<double>(0, &u, g, index, 2, 4);
240 write_gpu<double>(0, &u, g, index, 3, 4);
243 index = ipt_ext_gpu_loc(border + 1, ix, iy, iz);
245 write_gpu<double>(0, dn, g, index, 1, 4);
246 write_gpu<double>(0, dn, g, index, 2, 4);
247 write_gpu<double>(0, dn, g, index, 3, 4);
254__global__
void apply_gf_SF_BCs_2(suNg *g, suNg *up, suNg *dn,
int border,
int *ipt_gpu_d,
int xmax,
int ymax,
int zmax) {
255 for (
int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
256 for (
int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
257 for (
int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
258 int index = ipt_ext_gpu_loc(T_GPU + border - 1, ix, iy, iz);
262 write_gpu<double>(0, &u, g, index, 0, 4);
263 write_gpu<double>(0, up, g, index, 1, 4);
264 write_gpu<double>(0, up, g, index, 2, 4);
265 write_gpu<double>(0, up, g, index, 3, 4);
269 int index = ipt_ext_gpu_loc(T_GPU + border, ix, iy, iz);
273 write_gpu<double>(0, &u, g, index, 0, 4);
275 write_gpu<double>(0, &u, g, index, 1, 4);
277 write_gpu<double>(0, &u, g, index, 2, 4);
279 write_gpu<double>(0, &u, g, index, 3, 4);
287__global__
void apply_SF_classical_solution(suNg *g, suNg *U,
int it,
int border,
int *ipt_gpu_d,
int xmax,
int ymax,
289 for (
int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
290 for (
int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
291 for (
int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
292 int index = ipt_ext_gpu_loc(it, ix, iy, iz);
296 write_gpu<double>(0, &u, g, index, 0, 4);
298 write_gpu<double>(0, U, g, index, 1, 4);
299 write_gpu<double>(0, U, g, index, 2, 4);
300 write_gpu<double>(0, U, g, index, 3, 4);
307__global__
void apply_gf_open_BCs(suNg *g,
int border,
int *ipt_gpu_d,
int xmax,
int ymax,
int zmax) {
308 for (
int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
309 for (
int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
310 for (
int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
311 int index = ipt_ext_gpu_loc(border, ix, iy, iz);
315 write_gpu<double>(0, &u, g, index, 0, 4);
316 write_gpu<double>(0, &u, g, index, 1, 4);
317 write_gpu<double>(0, &u, g, index, 2, 4);
318 write_gpu<double>(0, &u, g, index, 3, 4);
325__global__
void apply_gf_open_BCs2(suNg *g,
int border,
int *ipt_gpu_d,
int xmax,
int ymax,
int zmax) {
326 for (
int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
327 for (
int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
328 for (
int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
329 int index = ipt_ext_gpu_loc(border, ix, iy, iz);
333 write_gpu<double>(0, &u, g, index, 0, 4);
344__global__
void apply_mf_Dirichlet_BCs(suNg_algebra_vector *V,
int border,
int *ipt_gpu_d,
int xmax,
int ymax,
int zmax) {
345 for (
int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
346 for (
int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
347 for (
int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
348 int index = ipt_ext_gpu_loc(border, ix, iy, iz);
350 suNg_algebra_vector v;
351 _algebra_vector_zero_g(v);
352 write_gpu<double>(0, &v, V, index, 0, 4);
353 write_gpu<double>(0, &v, V, index, 1, 4);
354 write_gpu<double>(0, &v, V, index, 2, 4);
355 write_gpu<double>(0, &v, V, index, 3, 4);
362__global__
void apply_mf_Dirichlet_BCs_spatial(suNg_algebra_vector *V,
int border,
int *ipt_gpu_d,
int xmax,
int ymax,
364 for (
int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
365 for (
int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
366 for (
int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
367 int index = ipt_ext_gpu_loc(border, ix, iy, iz);
369 suNg_algebra_vector v;
370 _algebra_vector_zero_g(v);
371 write_gpu<double>(0, &v, V, index, 1, 4);
372 write_gpu<double>(0, &v, V, index, 2, 4);
373 write_gpu<double>(0, &v, V, index, 3, 4);
380__global__
void apply_mf_Dirichlet_BCs_temporal(suNg_algebra_vector *V,
int border,
int *ipt_gpu_d,
int xmax,
int ymax,
382 for (
int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
383 for (
int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
384 for (
int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
385 int index = ipt_ext_gpu_loc(border, ix, iy, iz);
387 suNg_algebra_vector v;
388 _algebra_vector_zero_g(v);
389 write_gpu<double>(0, &v, V, index, 0, 4);
400__global__
void apply_sf_Dirichlet_BCs1(suNf_spinor *sp,
int master_shift,
int gsize,
int border,
int *ipt_gpu_d,
int xmax,
401 int ymax,
int zmax) {
402 for (
int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
403 for (
int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
404 for (
int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
408 int index = ipt_ext_gpu_loc(border, ix, iy, iz) - master_shift;
409 if (index != -1 && 0 <= index && gsize > index) { write_gpu<double>(0, &s, sp, index, 0, 1); }
411 index = ipt_ext_gpu_loc(border + 1, ix, iy, iz) - master_shift;
412 if (index != -1 && 0 <= index && gsize > index) { write_gpu<double>(0, &s, sp, index, 0, 1); }
418__global__
void apply_sf_Dirichlet_BCs2(suNf_spinor *sp,
int master_shift,
int gsize,
int border,
int *ipt_gpu_d,
int xmax,
419 int ymax,
int zmax) {
420 for (
int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
421 for (
int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
422 for (
int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
426 int index = ipt_ext_gpu_loc(border, ix, iy, iz) - master_shift;
427 if (index != -1 && 0 <= index && gsize > index) { write_gpu<double>(0, &s, sp, index, 0, 1); }
433__global__
void apply_sf_Dirichlet_BCs3(suNf_spinor *sp,
int master_shift,
int gsize,
int border,
int *ipt_gpu_d,
int xmax,
434 int ymax,
int zmax) {
435 for (
int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
436 for (
int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
437 for (
int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
440 int index = ipt_ext_gpu_loc(border - 1, ix, iy, iz) - master_shift;
441 if (index != -1 && 0 <= index && gsize > index) { write_gpu<double>(0, &s, sp, index, 0, 1); }
447__global__
void apply_sf_Dirichlet_BCs1_flt(suNf_spinor_flt *sp,
int master_shift,
int gsize,
int border,
int *ipt_gpu_d,
448 int xmax,
int ymax,
int zmax) {
449 for (
int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
450 for (
int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
451 for (
int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
455 int index = ipt_ext_gpu_loc(border, ix, iy, iz) - master_shift;
456 if (index != -1 && 0 <= index && gsize > index) { write_gpu<float>(0, &s, sp, index, 0, 1); }
458 index = ipt_ext_gpu_loc(border + 1, ix, iy, iz) - master_shift;
459 if (index != -1 && 0 <= index && gsize > index) { write_gpu<float>(0, &s, sp, index, 0, 1); }
465__global__
void apply_sf_Dirichlet_BCs2_flt(suNf_spinor_flt *sp,
int master_shift,
int gsize,
int border,
int *ipt_gpu_d,
466 int xmax,
int ymax,
int zmax) {
467 for (
int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
468 for (
int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
469 for (
int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
473 int index = ipt_ext_gpu_loc(border, ix, iy, iz) - master_shift;
474 if (index != -1 && 0 <= index && gsize > index) { write_gpu<float>(0, &s, sp, index, 0, 1); }
480__global__
void apply_sf_Dirichlet_BCs3_flt(suNf_spinor_flt *sp,
int master_shift,
int gsize,
int border,
int *ipt_gpu_d,
481 int xmax,
int ymax,
int zmax) {
482 for (
int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
483 for (
int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
484 for (
int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
487 int index = ipt_ext_gpu_loc(border - 1, ix, iy, iz) - master_shift;
488 if (index != -1 && 0 <= index && gsize > index) { write_gpu<float>(0, &s, sp, index, 0, 1); }
494__global__
void apply_sf_open_BCs(suNf_spinor *sp,
int master_shift,
int gsize,
int border,
int *ipt_gpu_d,
int xmax,
int ymax,
496 for (
int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
497 for (
int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
498 for (
int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
502 int index = ipt_ext_gpu_loc(border, ix, iy, iz) - master_shift;
503 if (index != -1 && 0 <= index && gsize > index) { write_gpu<double>(0, &s, sp, index, 0, 1); }
509__global__
void apply_sf_open_BCs_flt(suNf_spinor_flt *sp,
int master_shift,
int gsize,
int border,
int *ipt_gpu_d,
int xmax,
510 int ymax,
int zmax) {
511 for (
int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
512 for (
int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
513 for (
int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
517 int index = ipt_ext_gpu_loc(border, ix, iy, iz) - master_shift;
518 if (index != -1 && 0 <= index && gsize > index) { write_gpu<double>(0, &s, sp, index, 0, 1); }
This file contains information on the geometry of the local lattice, block decomposed geometry,...