HiRep 0.1
Loading...
Searching...
No Matches
gauge_observables_gpu_kernels.hpp
1/***************************************************************************\
2* Copyright (c) 2023, Sofie Martins *
3* All rights reserved. *
4\***************************************************************************/
5
6#include "libhr_core.h"
7#include "geometry.h"
8
9__device__ static double plaq_dev(int ix, int mu, int nu, suNg *gauge, int *iup_gpu, double *plaq_weight) {
10 int iy, iz;
11 double p;
12 suNg v1, v2, v3, v4, w1, w2, w3;
13
14 iy = iup_gpu[4 * ix + mu];
15 iz = iup_gpu[4 * ix + nu];
16
17 read_gpu<double>(0, &v1, gauge, ix, mu, 4);
18 read_gpu<double>(0, &v2, gauge, iy, nu, 4);
19 read_gpu<double>(0, &v3, gauge, iz, mu, 4);
20 read_gpu<double>(0, &v4, gauge, ix, nu, 4);
21
22 _suNg_times_suNg(w1, v1, v2);
23 _suNg_times_suNg(w2, v4, v3);
24 _suNg_times_suNg_dagger(w3, w1, w2);
25
26 _suNg_trace_re(p, w3);
27
28#ifdef PLAQ_WEIGHTS
29 if (plaq_weight == NULL) { return p; }
30 return plaq_weight[ix * 16 + mu * 4 + nu] * p;
31#else
32 return p;
33#endif
34}
35
36__device__ static void cplaq_dev(hr_complex *res, int ix, int mu, int nu, suNg *gauge, int *iup_gpu, double *plaq_weight) {
37 int iy, iz;
38 suNg v1, v2, v3, v4, w1, w2, w3;
39 double tmpre = 0.;
40 double tmpim = 0.;
41
42 iy = iup_gpu[4 * ix + mu];
43 iz = iup_gpu[4 * ix + nu];
44
45 read_gpu<double>(0, &v1, gauge, ix, mu, 4);
46 read_gpu<double>(0, &v2, gauge, iy, nu, 4);
47 read_gpu<double>(0, &v3, gauge, iz, mu, 4);
48 read_gpu<double>(0, &v4, gauge, ix, nu, 4);
49
50 _suNg_times_suNg(w1, v1, v2);
51 _suNg_times_suNg(w2, v4, v3);
52 _suNg_times_suNg_dagger(w3, w1, w2);
53
54 _suNg_trace_re(tmpre, w3);
55
56#ifndef GAUGE_SON
57 _suNg_trace_im(tmpim, w3);
58#endif
59
60 double *t = (double *)res;
61 t[0] = tmpre;
62 t[1] = tmpim;
63
64#ifdef PLAQ_WEIGHTS
65 if (plaq_weight != NULL) {
66 t[0] *= plaq_weight[ix * 16 + mu * 4 + nu];
67 t[1] *= plaq_weight[ix * 16 + mu * 4 + nu];
68 }
69#endif
70}
71
72__global__ void _avr_plaquette(suNg *u, double *resField, int *iup_gpu, int N, int block_start, double *plaq_weight) {
73 for (int id = blockIdx.x * blockDim.x + threadIdx.x; id < N; id += blockDim.x * gridDim.x) {
74 const int ix = id + block_start;
75 resField[id] = plaq_dev(ix, 1, 0, u, iup_gpu, plaq_weight);
76 resField[id] += plaq_dev(ix, 2, 0, u, iup_gpu, plaq_weight);
77 resField[id] += plaq_dev(ix, 2, 1, u, iup_gpu, plaq_weight);
78 resField[id] += plaq_dev(ix, 3, 0, u, iup_gpu, plaq_weight);
79 resField[id] += plaq_dev(ix, 3, 1, u, iup_gpu, plaq_weight);
80 resField[id] += plaq_dev(ix, 3, 2, u, iup_gpu, plaq_weight);
81 }
82}
83
84__global__ void _avr_plaquette_time(suNg *g, double *resPiece, int zero, int global_T, int *iup_gpu, int *timeslices, int N,
85 int T, int block_start, double *plaq_weight) {
86 for (int id = blockIdx.x * blockDim.x + threadIdx.x; id < N; id += blockDim.x * gridDim.x) {
87 const int ix = id + block_start;
88 int nt = timeslices[ix];
89 const int tc = (zero + nt + global_T) % global_T;
90
91 resPiece[id + N * tc] += plaq_dev(ix, 1, 0, g, iup_gpu, plaq_weight);
92 resPiece[id + N * tc] += plaq_dev(ix, 2, 0, g, iup_gpu, plaq_weight);
93 resPiece[id + N * tc] += plaq_dev(ix, 3, 0, g, iup_gpu, plaq_weight);
94
95 resPiece[id + N * tc + N * global_T] += plaq_dev(ix, 2, 1, g, iup_gpu, plaq_weight);
96 resPiece[id + N * tc + N * global_T] += plaq_dev(ix, 3, 1, g, iup_gpu, plaq_weight);
97 resPiece[id + N * tc + N * global_T] += plaq_dev(ix, 3, 2, g, iup_gpu, plaq_weight);
98 }
99}
100
101__global__ void _full_plaquette(suNg *u, hr_complex *resPiece, int *iup_gpu, int N, int block_start, double *plaq_weight) {
102 for (int id = blockIdx.x * blockDim.x + threadIdx.x; id < N; id += blockDim.x * gridDim.x) {
103 const int ix = id + block_start;
104 hr_complex tmp;
105
106 cplaq_dev(&tmp, ix, 1, 0, u, iup_gpu, plaq_weight);
107 resPiece[id] = tmp;
108
109 cplaq_dev(&tmp, ix, 2, 0, u, iup_gpu, plaq_weight);
110 resPiece[id + N] = tmp;
111
112 cplaq_dev(&tmp, ix, 2, 1, u, iup_gpu, plaq_weight);
113 resPiece[id + 2 * N] = tmp;
114
115 cplaq_dev(&tmp, ix, 3, 0, u, iup_gpu, plaq_weight);
116 resPiece[id + 3 * N] = tmp;
117
118 cplaq_dev(&tmp, ix, 3, 1, u, iup_gpu, plaq_weight);
119 resPiece[id + 4 * N] = tmp;
120
121 cplaq_dev(&tmp, ix, 3, 2, u, iup_gpu, plaq_weight);
122 resPiece[id + 5 * N] = tmp;
123 }
124}
125
126__device__ static void _clover_F_gpu(suNg_algebra_vector *F, suNg *V, int ix, int mu, int nu, int *iup_gpu, int *idn_gpu) {
127 int iy, iz, iw;
128 suNg v1, v2, v3, v4, w1, w2, w3;
129
130 _suNg_unit(w3);
131 _suNg_mul(w3, -4., w3);
132
133 iy = iup_gpu[4 * ix + mu];
134 iz = iup_gpu[4 * ix + nu];
135
136 read_gpu<double>(0, &v1, V, ix, mu, 4);
137 read_gpu<double>(0, &v2, V, iy, nu, 4);
138 read_gpu<double>(0, &v3, V, iz, mu, 4);
139 read_gpu<double>(0, &v4, V, ix, nu, 4);
140
141 _suNg_times_suNg(w1, v1, v2);
142 _suNg_times_suNg_dagger(w2, w1, v3);
143 _suNg_times_suNg_dagger(w1, w2, v4);
144 _suNg_add_assign(w3, w1);
145
146 iy = idn_gpu[4 * ix + mu];
147 iz = iup_gpu[4 * iy + nu];
148
149 read_gpu<double>(0, &v1, V, ix, nu, 4);
150 read_gpu<double>(0, &v2, V, iz, mu, 4);
151 read_gpu<double>(0, &v3, V, iy, nu, 4);
152 read_gpu<double>(0, &v4, V, iy, mu, 4);
153
154 _suNg_times_suNg_dagger(w1, v1, v2);
155 _suNg_times_suNg_dagger(w2, w1, v3);
156 _suNg_times_suNg(w1, w2, v4);
157 _suNg_add_assign(w3, w1);
158
159 iy = idn_gpu[4 * ix + mu];
160 iz = idn_gpu[4 * iy + nu];
161 iw = idn_gpu[4 * ix + nu];
162
163 read_gpu<double>(0, &v1, V, iy, mu, 4);
164 read_gpu<double>(0, &v2, V, iz, nu, 4);
165 read_gpu<double>(0, &v3, V, iz, mu, 4);
166 read_gpu<double>(0, &v4, V, iw, nu, 4);
167
168 _suNg_times_suNg(w1, v2, v1);
169 _suNg_dagger_times_suNg(w2, w1, v3);
170 _suNg_times_suNg(w1, w2, v4);
171 _suNg_add_assign(w3, w1);
172
173 iy = idn_gpu[4 * ix + nu];
174 iz = iup_gpu[4 * iy + mu];
175
176 read_gpu<double>(0, &v1, V, iy, nu, 4);
177 read_gpu<double>(0, &v2, V, iy, mu, 4);
178 read_gpu<double>(0, &v3, V, iz, nu, 4);
179 read_gpu<double>(0, &v4, V, ix, mu, 4);
180
181 _suNg_dagger_times_suNg(w1, v1, v2);
182 _suNg_times_suNg(w2, w1, v3);
183 _suNg_times_suNg_dagger(w1, w2, v4);
184 _suNg_add_assign(w3, w1);
185
186 _fund_algebra_project(*F, w3);
187 _algebra_vector_mul_g(*F, 1 / 4., *F);
188}
189
190__global__ void _E_gpu(suNg *v, double *resField, int *iup_gpu, int N, int block_start, double *plaq_weight) {
191 for (int id = blockIdx.x * blockDim.x + threadIdx.x; id < N; id += blockDim.x * gridDim.x) {
192 const int ix = id + block_start;
193 double p = 0.0;
194 resField[ix] = 0.0;
195 for (int mu = 0; mu < 4; mu++) {
196 for (int nu = mu + 1; nu < 4; nu++) {
197 p = plaq_dev(ix, mu, nu, v, iup_gpu, plaq_weight);
198 resField[ix] += NG - p;
199 }
200 }
201 }
202}
203
204__global__ void _E_T_gpu(suNg *v, double *resField, int *iup_gpu, int *timeslices, int zero, int global_T, int N,
205 int block_start, double *plaq_weight) {
206 for (int id = blockIdx.x * blockDim.x + threadIdx.x; id < N; id += blockDim.x * gridDim.x) {
207 const int ix = id + block_start;
208 int nt = timeslices[ix];
209 const int tc = (zero + nt + global_T) % global_T;
210 double p = 0.0;
211 int mu = 0;
212 for (int nu = 1; nu < 4; nu++) {
213 p = plaq_dev(ix, mu, nu, v, iup_gpu, plaq_weight);
214 resField[id + N * tc] += NG - p;
215 }
216 for (mu = 1; mu < 3; mu++) {
217 for (int nu = mu + 1; nu < 4; nu++) {
218 p = plaq_dev(ix, mu, nu, v, iup_gpu, plaq_weight);
219 resField[id + 2 * N * tc] += NG - p;
220 }
221 }
222 }
223}
224
225__global__ void _Esym_gpu(suNg *V, double *resField, int *iup_gpu, int *idn_gpu, int N, int block_start) {
226 for (int id = blockIdx.x * blockDim.x + threadIdx.x; id < N; id += blockDim.x * gridDim.x) {
227 const int ix = id + block_start;
228 suNg_algebra_vector clover;
229 double p;
230 resField[ix] = 0.0;
231 for (int mu = 0; mu < 4; mu++) {
232 for (int nu = mu + 1; nu < 4; nu++) {
233 _clover_F_gpu(&clover, V, ix, mu, nu, iup_gpu, idn_gpu);
234 _algebra_vector_sqnorm_g(p, clover);
235 resField[ix] += p;
236 }
237 }
238 }
239}
240
241__global__ void _Esym_T_gpu(suNg *v, double *resField, int *iup_gpu, int *idn_gpu, int *timeslices, int zero, int global_T,
242 int N, int block_start) {
243 for (int id = blockIdx.x * blockDim.x + threadIdx.x; id < N; id += blockDim.x * gridDim.x) {
244 const int ix = id + block_start;
245 int nt = timeslices[ix];
246 const int tc = (zero + nt + global_T) % global_T;
247 suNg_algebra_vector clover;
248 double p = 0.0;
249 int mu = 0;
250 for (int nu = 1; nu < 4; nu++) {
251 _clover_F_gpu(&clover, v, ix, mu, nu, iup_gpu, idn_gpu);
252 _algebra_vector_sqnorm_g(p, clover);
253 resField[id + N * tc] += p;
254 }
255 for (mu = 1; mu < 3; mu++) {
256 for (int nu = mu + 1; nu < 4; nu++) {
257 _clover_F_gpu(&clover, v, ix, mu, nu, iup_gpu, idn_gpu);
258 _algebra_vector_sqnorm_g(p, clover);
259 resField[id + 2 * N * tc] += p;
260 }
261 }
262 }
263}
264
265__global__ void _topo_gpu(double *resField, suNg *v, int *iup_gpu, int *idn_gpu, int N, int block_start) {
266 for (int id = blockIdx.x * blockDim.x + threadIdx.x; id < N; id += blockDim.x * gridDim.x) {
267 const int ix = id + block_start;
268 resField[ix] = 0.0;
269 suNg_algebra_vector F1, F2;
270 _clover_F_gpu(&F1, v, ix, 1, 2, iup_gpu, idn_gpu);
271 _clover_F_gpu(&F2, v, ix, 0, 3, iup_gpu, idn_gpu);
272 for (int i = 0; i < NG * NG - 1; i++) {
273 resField[ix] += F1.c[i] * F2.c[i];
274 }
275
276 _clover_F_gpu(&F1, v, ix, 1, 3, iup_gpu, idn_gpu);
277 _clover_F_gpu(&F2, v, ix, 0, 2, iup_gpu, idn_gpu);
278 for (int i = 0; i < NG * NG - 1; i++) {
279 resField[ix] -= F1.c[i] * F2.c[i];
280 }
281
282 _clover_F_gpu(&F1, v, ix, 0, 1, iup_gpu, idn_gpu);
283 _clover_F_gpu(&F2, v, ix, 2, 3, iup_gpu, idn_gpu);
284 for (int i = 0; i < NG * NG - 1; i++) {
285 resField[ix] += F1.c[i] * F2.c[i];
286 }
287 }
288}
This file contains information on the geometry of the local lattice, block decomposed geometry,...