HiRep 0.1
Loading...
Searching...
No Matches
boundary_conditions_gpu_kernels.hpp
1/***************************************************************************\
2* Copyright (c) 2024, Sofie Martins *
3* All rights reserved. *
4\***************************************************************************/
5
6// Ported from boundary_conditions_core.c by Agostino Patella and
7// Claudio Pica
8
9#ifdef WITH_GPU
10
11#include "libhr_core.h"
12#include "geometry.h"
13
14#define ipt_ext_gpu_loc(t, x, y, z) ipt_gpu_d[_lexi(T_EXT_GPU, X_EXT_GPU, Y_EXT_GPU, Z_EXT_GPU, t, x, y, z)]
15
16/***************************************************************************/
17/* BOUNDARY CONDITIONS TO BE APPLIED ON THE CLOVER TERM */
18/***************************************************************************/
19
20__global__ void apply_cl_SF_BCs(suNfc *C, int border, int *ipt_gpu_d, int xmax, int ymax, int zmax) {
21 for (int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
22 for (int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
23 for (int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
24 int index = 0;
25 suNfc c;
26 _suNfc_zero(c);
27 if (border > 0) {
28 index = ipt_ext_gpu_loc(border - 1, ix, iy, iz);
29 if (index != -1) {
30 write_gpu<double>(0, &c, C, index, 0, 4);
31 write_gpu<double>(0, &c, C, index, 1, 4);
32 write_gpu<double>(0, &c, C, index, 2, 4);
33 write_gpu<double>(0, &c, C, index, 3, 4);
34 }
35 }
36
37 index = ipt_ext_gpu_loc(border, ix, iy, iz);
38 if (index != -1) {
39 write_gpu<double>(0, &c, C, index, 0, 4);
40 write_gpu<double>(0, &c, C, index, 1, 4);
41 write_gpu<double>(0, &c, C, index, 2, 4);
42 write_gpu<double>(0, &c, C, index, 3, 4);
43 }
44
45 index = ipt_ext_gpu_loc(border + 1, ix, iy, iz);
46 if (index != -1) {
47 write_gpu<double>(0, &c, C, index, 0, 4);
48 write_gpu<double>(0, &c, C, index, 1, 4);
49 write_gpu<double>(0, &c, C, index, 2, 4);
50 write_gpu<double>(0, &c, C, index, 3, 4);
51 }
52 }
53 }
54 }
55}
56
57__global__ void apply_cl_open_BCs1(suNfc *C, int border, int *ipt_gpu_d, int xmax, int ymax, int zmax) {
58 for (int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
59 for (int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
60 for (int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
61 int index = 0;
62 suNfc c;
63 _suNfc_zero(c);
64 if (border > 0) {
65 index = ipt_ext_gpu_loc(border - 1, ix, iy, iz);
66 if (index != -1) {
67 write_gpu<double>(0, &c, C, index, 0, 4);
68 write_gpu<double>(0, &c, C, index, 1, 4);
69 write_gpu<double>(0, &c, C, index, 2, 4);
70 write_gpu<double>(0, &c, C, index, 3, 4);
71 }
72 }
73
74 index = ipt_ext_gpu_loc(border, ix, iy, iz);
75 if (index != -1) {
76 write_gpu<double>(0, &c, C, index, 0, 4);
77 write_gpu<double>(0, &c, C, index, 1, 4);
78 write_gpu<double>(0, &c, C, index, 2, 4);
79 write_gpu<double>(0, &c, C, index, 3, 4);
80 }
81 }
82 }
83 }
84}
85
86__global__ void apply_cl_open_BCs2(suNfc *C, int border, int *ipt_gpu_d, int xmax, int ymax, int zmax) {
87 for (int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
88 for (int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
89 for (int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
90 int index = 0;
91 suNfc c;
92 _suNfc_zero(c);
93 if (border > 0) {
94 index = ipt_ext_gpu_loc(T_GPU + border, ix, iy, iz);
95 if (index != -1) {
96 write_gpu<double>(0, &c, C, index, 0, 4);
97 write_gpu<double>(0, &c, C, index, 1, 4);
98 write_gpu<double>(0, &c, C, index, 2, 4);
99 write_gpu<double>(0, &c, C, index, 3, 4);
100 }
101 }
102
103 index = ipt_ext_gpu_loc(T_GPU + border - 1, ix, iy, iz);
104 if (index != -1) {
105 write_gpu<double>(0, &c, C, index, 0, 4);
106 write_gpu<double>(0, &c, C, index, 1, 4);
107 write_gpu<double>(0, &c, C, index, 2, 4);
108 write_gpu<double>(0, &c, C, index, 3, 4);
109 }
110 }
111 }
112 }
113}
114
115/***************************************************************************/
116/* BOUNDARY CONDITIONS TO BE APPLIED ON THE REPRESENTED GAUGE FIELD */
117/***************************************************************************/
118
119__global__ void apply_boundary_conditions_T(suNf *g, int border, int *ipt_gpu_d, int xmax, int ymax, int zmax) {
120 for (int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
121 for (int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
122 for (int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
123 int index = ipt_ext_gpu_loc(2 * border, ix, iy, iz);
124 if (index != -1) {
125 suNf u;
126 read_gpu<double>(0, &u, g, index, 0, 4);
127 _suNf_minus(u, u);
128 write_gpu<double>(0, &u, g, index, 0, 4);
129 }
130 }
131 }
132 }
133}
134
135__global__ void apply_boundary_conditions_X(suNf *g, int border, int *ipt_gpu_d, int tmax, int ymax, int zmax) {
136 for (int it = blockDim.x * blockIdx.x + threadIdx.x; it < tmax; it += gridDim.x * blockDim.x) {
137 for (int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
138 for (int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
139 int index = ipt_ext_gpu_loc(it, 2 * border, iy, iz);
140 if (index != -1) {
141 suNf u;
142 read_gpu<double>(0, &u, g, index, 1, 4);
143 _suNf_minus(u, u);
144 write_gpu<double>(0, &u, g, index, 1, 4);
145 }
146 }
147 }
148 }
149}
150
151__global__ void apply_boundary_conditions_Y(suNf *g, int border, int *ipt_gpu_d, int tmax, int xmax, int zmax) {
152 for (int it = blockDim.x * blockIdx.x + threadIdx.x; it < tmax; it += gridDim.x * blockDim.x) {
153 for (int ix = blockDim.y * blockIdx.y + threadIdx.y; ix < xmax; ix += gridDim.y * blockDim.y) {
154 for (int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
155 int index = ipt_ext_gpu_loc(it, ix, 2 * border, iz);
156 if (index != -1) {
157 suNf u;
158 read_gpu<double>(0, &u, g, index, 2, 4);
159 _suNf_minus(u, u);
160 write_gpu<double>(0, &u, g, index, 2, 4);
161 }
162 }
163 }
164 }
165}
166
167__global__ void apply_boundary_conditions_Z(suNf *g, int border, int *ipt_gpu_d, int tmax, int xmax, int ymax) {
168 for (int it = blockDim.x * blockIdx.x + threadIdx.x; it < tmax; it += gridDim.x * blockDim.x) {
169 for (int ix = blockDim.y * blockIdx.y + threadIdx.y; ix < xmax; ix += gridDim.y * blockDim.y) {
170 for (int iy = blockDim.z * blockIdx.z + threadIdx.z; iy < ymax; iy += gridDim.z * blockDim.z) {
171 int index = ipt_ext_gpu_loc(it, ix, iy, 2 * border);
172 if (index != -1) {
173 suNf u;
174 read_gpu<double>(0, &u, g, index, 3, 4);
175 _suNf_minus(u, u);
176 write_gpu<double>(0, &u, g, index, 3, 4);
177 }
178 }
179 }
180 }
181}
182
183__global__ void apply_chiSF_ds_BT(double ds, suNf *g, int border, int *ipt_gpu_d, int xmax, int ymax, int zmax) {
184 for (int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
185 for (int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
186 for (int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
187 int index = ipt_ext_gpu_loc(border, ix, iy, iz);
188 if (index != -1) {
189 suNf u;
190 read_gpu<double>(0, &u, g, index, 1, 4);
191 _suNf_mul(u, ds, u);
192 write_gpu<double>(0, &u, g, index, 1, 4);
193
194 read_gpu<double>(0, &u, g, index, 2, 4);
195 _suNf_mul(u, ds, u);
196 write_gpu<double>(0, &u, g, index, 2, 4);
197
198 read_gpu<double>(0, &u, g, index, 3, 4);
199 _suNf_mul(u, ds, u);
200 write_gpu<double>(0, &u, g, index, 3, 4);
201 }
202 }
203 }
204 }
205}
206
207/***************************************************************************/
208/* BOUNDARY CONDITIONS TO BE APPLIED ON THE FUNDAMENTAL GAUGE FIELD */
209/***************************************************************************/
210
211__global__ void apply_gf_SF_BCs_1(suNg *g, suNg *up, suNg *dn, int border, int *ipt_gpu_d, int xmax, int ymax, int zmax) {
212 for (int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
213 for (int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
214 for (int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
215 int index = ipt_ext_gpu_loc(border - 1, ix, iy, iz);
216 if (border > 0) {
217 if (index != -1) {
218 suNg u;
219 _suNg_unit(u);
220 write_gpu<double>(0, &u, g, index, 0, 4);
221 _suNg_unit(u);
222 write_gpu<double>(0, &u, g, index, 1, 4);
223 _suNg_unit(u);
224 write_gpu<double>(0, &u, g, index, 2, 4);
225 _suNg_unit(u);
226 write_gpu<double>(0, &u, g, index, 3, 4);
227 }
228 }
229
230 index = ipt_ext_gpu_loc(border, ix, iy, iz);
231 if (index != -1) {
232 suNg u;
233 _suNg_unit(u);
234 write_gpu<double>(0, &u, g, index, 0, 4);
235 _suNg_unit(u);
236 write_gpu<double>(0, &u, g, index, 1, 4);
237 _suNg_unit(u);
238 write_gpu<double>(0, &u, g, index, 2, 4);
239 _suNg_unit(u);
240 write_gpu<double>(0, &u, g, index, 3, 4);
241 }
242
243 index = ipt_ext_gpu_loc(border + 1, ix, iy, iz);
244 if (index != -1) {
245 write_gpu<double>(0, dn, g, index, 1, 4);
246 write_gpu<double>(0, dn, g, index, 2, 4);
247 write_gpu<double>(0, dn, g, index, 3, 4);
248 }
249 }
250 }
251 }
252}
253
254__global__ void apply_gf_SF_BCs_2(suNg *g, suNg *up, suNg *dn, int border, int *ipt_gpu_d, int xmax, int ymax, int zmax) {
255 for (int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
256 for (int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
257 for (int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
258 int index = ipt_ext_gpu_loc(T_GPU + border - 1, ix, iy, iz);
259 if (index != -1) {
260 suNg u;
261 _suNg_unit(u);
262 write_gpu<double>(0, &u, g, index, 0, 4);
263 write_gpu<double>(0, up, g, index, 1, 4);
264 write_gpu<double>(0, up, g, index, 2, 4);
265 write_gpu<double>(0, up, g, index, 3, 4);
266 }
267
268 if (border > 0) {
269 int index = ipt_ext_gpu_loc(T_GPU + border, ix, iy, iz);
270 if (index != -1) {
271 suNg u;
272 _suNg_unit(u);
273 write_gpu<double>(0, &u, g, index, 0, 4);
274 _suNg_unit(u);
275 write_gpu<double>(0, &u, g, index, 1, 4);
276 _suNg_unit(u);
277 write_gpu<double>(0, &u, g, index, 2, 4);
278 _suNg_unit(u);
279 write_gpu<double>(0, &u, g, index, 3, 4);
280 }
281 }
282 }
283 }
284 }
285}
286
287__global__ void apply_SF_classical_solution(suNg *g, suNg *U, int it, int border, int *ipt_gpu_d, int xmax, int ymax,
288 int zmax) {
289 for (int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
290 for (int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
291 for (int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
292 int index = ipt_ext_gpu_loc(it, ix, iy, iz);
293 if (index != -1) {
294 suNg u;
295 _suNg_unit(u);
296 write_gpu<double>(0, &u, g, index, 0, 4);
297
298 write_gpu<double>(0, U, g, index, 1, 4);
299 write_gpu<double>(0, U, g, index, 2, 4);
300 write_gpu<double>(0, U, g, index, 3, 4);
301 }
302 }
303 }
304 }
305}
306
307__global__ void apply_gf_open_BCs(suNg *g, int border, int *ipt_gpu_d, int xmax, int ymax, int zmax) {
308 for (int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
309 for (int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
310 for (int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
311 int index = ipt_ext_gpu_loc(border, ix, iy, iz);
312 if (index != -1) {
313 suNg u;
314 _suNg_zero(u);
315 write_gpu<double>(0, &u, g, index, 0, 4);
316 write_gpu<double>(0, &u, g, index, 1, 4);
317 write_gpu<double>(0, &u, g, index, 2, 4);
318 write_gpu<double>(0, &u, g, index, 3, 4);
319 }
320 }
321 }
322 }
323}
324
325__global__ void apply_gf_open_BCs2(suNg *g, int border, int *ipt_gpu_d, int xmax, int ymax, int zmax) {
326 for (int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
327 for (int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
328 for (int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
329 int index = ipt_ext_gpu_loc(border, ix, iy, iz);
330 if (index != -1) {
331 suNg u;
332 _suNg_zero(u);
333 write_gpu<double>(0, &u, g, index, 0, 4);
334 }
335 }
336 }
337 }
338}
339
340/***************************************************************************/
341/* BOUNDARY CONDITIONS TO BE APPLIED ON THE MOMENTUM FIELDS */
342/***************************************************************************/
343
344__global__ void apply_mf_Dirichlet_BCs(suNg_algebra_vector *V, int border, int *ipt_gpu_d, int xmax, int ymax, int zmax) {
345 for (int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
346 for (int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
347 for (int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
348 int index = ipt_ext_gpu_loc(border, ix, iy, iz);
349 if (index != -1) {
350 suNg_algebra_vector v;
351 _algebra_vector_zero_g(v);
352 write_gpu<double>(0, &v, V, index, 0, 4);
353 write_gpu<double>(0, &v, V, index, 1, 4);
354 write_gpu<double>(0, &v, V, index, 2, 4);
355 write_gpu<double>(0, &v, V, index, 3, 4);
356 }
357 }
358 }
359 }
360}
361
362__global__ void apply_mf_Dirichlet_BCs_spatial(suNg_algebra_vector *V, int border, int *ipt_gpu_d, int xmax, int ymax,
363 int zmax) {
364 for (int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
365 for (int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
366 for (int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
367 int index = ipt_ext_gpu_loc(border, ix, iy, iz);
368 if (index != -1) {
369 suNg_algebra_vector v;
370 _algebra_vector_zero_g(v);
371 write_gpu<double>(0, &v, V, index, 1, 4);
372 write_gpu<double>(0, &v, V, index, 2, 4);
373 write_gpu<double>(0, &v, V, index, 3, 4);
374 }
375 }
376 }
377 }
378}
379
380__global__ void apply_mf_Dirichlet_BCs_temporal(suNg_algebra_vector *V, int border, int *ipt_gpu_d, int xmax, int ymax,
381 int zmax) {
382 for (int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
383 for (int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
384 for (int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
385 int index = ipt_ext_gpu_loc(border, ix, iy, iz);
386 if (index != -1) {
387 suNg_algebra_vector v;
388 _algebra_vector_zero_g(v);
389 write_gpu<double>(0, &v, V, index, 0, 4);
390 }
391 }
392 }
393 }
394}
395
396/***************************************************************************/
397/* BOUNDARY CONDITIONS TO BE APPLIED ON THE SPINOR FIELDS */
398/***************************************************************************/
399
400__global__ void apply_sf_Dirichlet_BCs1(suNf_spinor *sp, int master_shift, int gsize, int border, int *ipt_gpu_d, int xmax,
401 int ymax, int zmax) {
402 for (int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
403 for (int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
404 for (int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
405 suNf_spinor s;
406 _spinor_zero_f(s);
407
408 int index = ipt_ext_gpu_loc(border, ix, iy, iz) - master_shift;
409 if (index != -1 && 0 <= index && gsize > index) { write_gpu<double>(0, &s, sp, index, 0, 1); }
410
411 index = ipt_ext_gpu_loc(border + 1, ix, iy, iz) - master_shift;
412 if (index != -1 && 0 <= index && gsize > index) { write_gpu<double>(0, &s, sp, index, 0, 1); }
413 }
414 }
415 }
416}
417
418__global__ void apply_sf_Dirichlet_BCs2(suNf_spinor *sp, int master_shift, int gsize, int border, int *ipt_gpu_d, int xmax,
419 int ymax, int zmax) {
420 for (int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
421 for (int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
422 for (int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
423 suNf_spinor s;
424 _spinor_zero_f(s);
425
426 int index = ipt_ext_gpu_loc(border, ix, iy, iz) - master_shift;
427 if (index != -1 && 0 <= index && gsize > index) { write_gpu<double>(0, &s, sp, index, 0, 1); }
428 }
429 }
430 }
431}
432
433__global__ void apply_sf_Dirichlet_BCs3(suNf_spinor *sp, int master_shift, int gsize, int border, int *ipt_gpu_d, int xmax,
434 int ymax, int zmax) {
435 for (int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
436 for (int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
437 for (int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
438 suNf_spinor s;
439 _spinor_zero_f(s);
440 int index = ipt_ext_gpu_loc(border - 1, ix, iy, iz) - master_shift;
441 if (index != -1 && 0 <= index && gsize > index) { write_gpu<double>(0, &s, sp, index, 0, 1); }
442 }
443 }
444 }
445}
446
447__global__ void apply_sf_Dirichlet_BCs1_flt(suNf_spinor_flt *sp, int master_shift, int gsize, int border, int *ipt_gpu_d,
448 int xmax, int ymax, int zmax) {
449 for (int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
450 for (int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
451 for (int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
452 suNf_spinor_flt s;
453 _spinor_zero_f(s);
454
455 int index = ipt_ext_gpu_loc(border, ix, iy, iz) - master_shift;
456 if (index != -1 && 0 <= index && gsize > index) { write_gpu<float>(0, &s, sp, index, 0, 1); }
457
458 index = ipt_ext_gpu_loc(border + 1, ix, iy, iz) - master_shift;
459 if (index != -1 && 0 <= index && gsize > index) { write_gpu<float>(0, &s, sp, index, 0, 1); }
460 }
461 }
462 }
463}
464
465__global__ void apply_sf_Dirichlet_BCs2_flt(suNf_spinor_flt *sp, int master_shift, int gsize, int border, int *ipt_gpu_d,
466 int xmax, int ymax, int zmax) {
467 for (int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
468 for (int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
469 for (int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
470 suNf_spinor_flt s;
471 _spinor_zero_f(s);
472
473 int index = ipt_ext_gpu_loc(border, ix, iy, iz) - master_shift;
474 if (index != -1 && 0 <= index && gsize > index) { write_gpu<float>(0, &s, sp, index, 0, 1); }
475 }
476 }
477 }
478}
479
480__global__ void apply_sf_Dirichlet_BCs3_flt(suNf_spinor_flt *sp, int master_shift, int gsize, int border, int *ipt_gpu_d,
481 int xmax, int ymax, int zmax) {
482 for (int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
483 for (int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
484 for (int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
485 suNf_spinor_flt s;
486 _spinor_zero_f(s);
487 int index = ipt_ext_gpu_loc(border - 1, ix, iy, iz) - master_shift;
488 if (index != -1 && 0 <= index && gsize > index) { write_gpu<float>(0, &s, sp, index, 0, 1); }
489 }
490 }
491 }
492}
493
494__global__ void apply_sf_open_BCs(suNf_spinor *sp, int master_shift, int gsize, int border, int *ipt_gpu_d, int xmax, int ymax,
495 int zmax) {
496 for (int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
497 for (int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
498 for (int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
499 suNf_spinor s;
500 _spinor_zero_f(s);
501
502 int index = ipt_ext_gpu_loc(border, ix, iy, iz) - master_shift;
503 if (index != -1 && 0 <= index && gsize > index) { write_gpu<double>(0, &s, sp, index, 0, 1); }
504 }
505 }
506 }
507}
508
509__global__ void apply_sf_open_BCs_flt(suNf_spinor_flt *sp, int master_shift, int gsize, int border, int *ipt_gpu_d, int xmax,
510 int ymax, int zmax) {
511 for (int ix = blockDim.x * blockIdx.x + threadIdx.x; ix < xmax; ix += gridDim.x * blockDim.x) {
512 for (int iy = blockDim.y * blockIdx.y + threadIdx.y; iy < ymax; iy += gridDim.y * blockDim.y) {
513 for (int iz = blockDim.z * blockIdx.z + threadIdx.z; iz < zmax; iz += gridDim.z * blockDim.z) {
514 suNf_spinor_flt s;
515 _spinor_zero_f(s);
516
517 int index = ipt_ext_gpu_loc(border, ix, iy, iz) - master_shift;
518 if (index != -1 && 0 <= index && gsize > index) { write_gpu<double>(0, &s, sp, index, 0, 1); }
519 }
520 }
521 }
522}
523
524#endif
This file contains information on the geometry of the local lattice, block decomposed geometry,...