9__device__
static double plaq_dev(
int ix,
int mu,
int nu, suNg *gauge,
int *iup_gpu,
double *plaq_weight) {
12 suNg v1, v2, v3, v4, w1, w2, w3;
14 iy = iup_gpu[4 * ix + mu];
15 iz = iup_gpu[4 * ix + nu];
17 read_gpu<double>(0, &v1, gauge, ix, mu, 4);
18 read_gpu<double>(0, &v2, gauge, iy, nu, 4);
19 read_gpu<double>(0, &v3, gauge, iz, mu, 4);
20 read_gpu<double>(0, &v4, gauge, ix, nu, 4);
22 _suNg_times_suNg(w1, v1, v2);
23 _suNg_times_suNg(w2, v4, v3);
24 _suNg_times_suNg_dagger(w3, w1, w2);
26 _suNg_trace_re(p, w3);
29 if (plaq_weight == NULL) {
return p; }
30 return plaq_weight[ix * 16 + mu * 4 + nu] * p;
36__device__
static void cplaq_dev(hr_complex *res,
int ix,
int mu,
int nu, suNg *gauge,
int *iup_gpu,
double *plaq_weight) {
38 suNg v1, v2, v3, v4, w1, w2, w3;
42 iy = iup_gpu[4 * ix + mu];
43 iz = iup_gpu[4 * ix + nu];
45 read_gpu<double>(0, &v1, gauge, ix, mu, 4);
46 read_gpu<double>(0, &v2, gauge, iy, nu, 4);
47 read_gpu<double>(0, &v3, gauge, iz, mu, 4);
48 read_gpu<double>(0, &v4, gauge, ix, nu, 4);
50 _suNg_times_suNg(w1, v1, v2);
51 _suNg_times_suNg(w2, v4, v3);
52 _suNg_times_suNg_dagger(w3, w1, w2);
54 _suNg_trace_re(tmpre, w3);
57 _suNg_trace_im(tmpim, w3);
60 double *t = (
double *)res;
65 if (plaq_weight != NULL) {
66 t[0] *= plaq_weight[ix * 16 + mu * 4 + nu];
67 t[1] *= plaq_weight[ix * 16 + mu * 4 + nu];
72__global__
void _avr_plaquette(suNg *u,
double *resField,
int *iup_gpu,
int N,
int block_start,
double *plaq_weight) {
73 for (
int id = blockIdx.x * blockDim.x + threadIdx.x;
id < N;
id += blockDim.x * gridDim.x) {
74 const int ix =
id + block_start;
75 resField[id] = plaq_dev(ix, 1, 0, u, iup_gpu, plaq_weight);
76 resField[id] += plaq_dev(ix, 2, 0, u, iup_gpu, plaq_weight);
77 resField[id] += plaq_dev(ix, 2, 1, u, iup_gpu, plaq_weight);
78 resField[id] += plaq_dev(ix, 3, 0, u, iup_gpu, plaq_weight);
79 resField[id] += plaq_dev(ix, 3, 1, u, iup_gpu, plaq_weight);
80 resField[id] += plaq_dev(ix, 3, 2, u, iup_gpu, plaq_weight);
84__global__
void _avr_plaquette_time(suNg *g,
double *resPiece,
int zero,
int global_T,
int *iup_gpu,
int *timeslices,
int N,
85 int T,
int block_start,
double *plaq_weight) {
86 for (
int id = blockIdx.x * blockDim.x + threadIdx.x;
id < N;
id += blockDim.x * gridDim.x) {
87 const int ix =
id + block_start;
88 int nt = timeslices[ix];
89 const int tc = (zero + nt + global_T) % global_T;
91 resPiece[
id + N * tc] += plaq_dev(ix, 1, 0, g, iup_gpu, plaq_weight);
92 resPiece[
id + N * tc] += plaq_dev(ix, 2, 0, g, iup_gpu, plaq_weight);
93 resPiece[
id + N * tc] += plaq_dev(ix, 3, 0, g, iup_gpu, plaq_weight);
95 resPiece[
id + N * tc + N * global_T] += plaq_dev(ix, 2, 1, g, iup_gpu, plaq_weight);
96 resPiece[
id + N * tc + N * global_T] += plaq_dev(ix, 3, 1, g, iup_gpu, plaq_weight);
97 resPiece[
id + N * tc + N * global_T] += plaq_dev(ix, 3, 2, g, iup_gpu, plaq_weight);
101__global__
void _full_plaquette(suNg *u, hr_complex *resPiece,
int *iup_gpu,
int N,
int block_start,
double *plaq_weight) {
102 for (
int id = blockIdx.x * blockDim.x + threadIdx.x;
id < N;
id += blockDim.x * gridDim.x) {
103 const int ix =
id + block_start;
106 cplaq_dev(&tmp, ix, 1, 0, u, iup_gpu, plaq_weight);
109 cplaq_dev(&tmp, ix, 2, 0, u, iup_gpu, plaq_weight);
110 resPiece[
id + N] = tmp;
112 cplaq_dev(&tmp, ix, 2, 1, u, iup_gpu, plaq_weight);
113 resPiece[
id + 2 * N] = tmp;
115 cplaq_dev(&tmp, ix, 3, 0, u, iup_gpu, plaq_weight);
116 resPiece[
id + 3 * N] = tmp;
118 cplaq_dev(&tmp, ix, 3, 1, u, iup_gpu, plaq_weight);
119 resPiece[
id + 4 * N] = tmp;
121 cplaq_dev(&tmp, ix, 3, 2, u, iup_gpu, plaq_weight);
122 resPiece[
id + 5 * N] = tmp;
126__device__
static void _clover_F_gpu(suNg_algebra_vector *F, suNg *V,
int ix,
int mu,
int nu,
int *iup_gpu,
int *idn_gpu) {
128 suNg v1, v2, v3, v4, w1, w2, w3;
131 _suNg_mul(w3, -4., w3);
133 iy = iup_gpu[4 * ix + mu];
134 iz = iup_gpu[4 * ix + nu];
136 read_gpu<double>(0, &v1, V, ix, mu, 4);
137 read_gpu<double>(0, &v2, V, iy, nu, 4);
138 read_gpu<double>(0, &v3, V, iz, mu, 4);
139 read_gpu<double>(0, &v4, V, ix, nu, 4);
141 _suNg_times_suNg(w1, v1, v2);
142 _suNg_times_suNg_dagger(w2, w1, v3);
143 _suNg_times_suNg_dagger(w1, w2, v4);
144 _suNg_add_assign(w3, w1);
146 iy = idn_gpu[4 * ix + mu];
147 iz = iup_gpu[4 * iy + nu];
149 read_gpu<double>(0, &v1, V, ix, nu, 4);
150 read_gpu<double>(0, &v2, V, iz, mu, 4);
151 read_gpu<double>(0, &v3, V, iy, nu, 4);
152 read_gpu<double>(0, &v4, V, iy, mu, 4);
154 _suNg_times_suNg_dagger(w1, v1, v2);
155 _suNg_times_suNg_dagger(w2, w1, v3);
156 _suNg_times_suNg(w1, w2, v4);
157 _suNg_add_assign(w3, w1);
159 iy = idn_gpu[4 * ix + mu];
160 iz = idn_gpu[4 * iy + nu];
161 iw = idn_gpu[4 * ix + nu];
163 read_gpu<double>(0, &v1, V, iy, mu, 4);
164 read_gpu<double>(0, &v2, V, iz, nu, 4);
165 read_gpu<double>(0, &v3, V, iz, mu, 4);
166 read_gpu<double>(0, &v4, V, iw, nu, 4);
168 _suNg_times_suNg(w1, v2, v1);
169 _suNg_dagger_times_suNg(w2, w1, v3);
170 _suNg_times_suNg(w1, w2, v4);
171 _suNg_add_assign(w3, w1);
173 iy = idn_gpu[4 * ix + nu];
174 iz = iup_gpu[4 * iy + mu];
176 read_gpu<double>(0, &v1, V, iy, nu, 4);
177 read_gpu<double>(0, &v2, V, iy, mu, 4);
178 read_gpu<double>(0, &v3, V, iz, nu, 4);
179 read_gpu<double>(0, &v4, V, ix, mu, 4);
181 _suNg_dagger_times_suNg(w1, v1, v2);
182 _suNg_times_suNg(w2, w1, v3);
183 _suNg_times_suNg_dagger(w1, w2, v4);
184 _suNg_add_assign(w3, w1);
186 _fund_algebra_project(*F, w3);
187 _algebra_vector_mul_g(*F, 1 / 4., *F);
190__global__
void _E_gpu(suNg *v,
double *resField,
int *iup_gpu,
int N,
int block_start,
double *plaq_weight) {
191 for (
int id = blockIdx.x * blockDim.x + threadIdx.x;
id < N;
id += blockDim.x * gridDim.x) {
192 const int ix =
id + block_start;
195 for (
int mu = 0; mu < 4; mu++) {
196 for (
int nu = mu + 1; nu < 4; nu++) {
197 p = plaq_dev(ix, mu, nu, v, iup_gpu, plaq_weight);
198 resField[ix] += NG - p;
204__global__
void _E_T_gpu(suNg *v,
double *resField,
int *iup_gpu,
int *timeslices,
int zero,
int global_T,
int N,
205 int block_start,
double *plaq_weight) {
206 for (
int id = blockIdx.x * blockDim.x + threadIdx.x;
id < N;
id += blockDim.x * gridDim.x) {
207 const int ix =
id + block_start;
208 int nt = timeslices[ix];
209 const int tc = (zero + nt + global_T) % global_T;
212 for (
int nu = 1; nu < 4; nu++) {
213 p = plaq_dev(ix, mu, nu, v, iup_gpu, plaq_weight);
214 resField[
id + N * tc] += NG - p;
216 for (mu = 1; mu < 3; mu++) {
217 for (
int nu = mu + 1; nu < 4; nu++) {
218 p = plaq_dev(ix, mu, nu, v, iup_gpu, plaq_weight);
219 resField[
id + 2 * N * tc] += NG - p;
225__global__
void _Esym_gpu(suNg *V,
double *resField,
int *iup_gpu,
int *idn_gpu,
int N,
int block_start) {
226 for (
int id = blockIdx.x * blockDim.x + threadIdx.x;
id < N;
id += blockDim.x * gridDim.x) {
227 const int ix =
id + block_start;
228 suNg_algebra_vector clover;
231 for (
int mu = 0; mu < 4; mu++) {
232 for (
int nu = mu + 1; nu < 4; nu++) {
233 _clover_F_gpu(&clover, V, ix, mu, nu, iup_gpu, idn_gpu);
234 _algebra_vector_sqnorm_g(p, clover);
241__global__
void _Esym_T_gpu(suNg *v,
double *resField,
int *iup_gpu,
int *idn_gpu,
int *timeslices,
int zero,
int global_T,
242 int N,
int block_start) {
243 for (
int id = blockIdx.x * blockDim.x + threadIdx.x;
id < N;
id += blockDim.x * gridDim.x) {
244 const int ix =
id + block_start;
245 int nt = timeslices[ix];
246 const int tc = (zero + nt + global_T) % global_T;
247 suNg_algebra_vector clover;
250 for (
int nu = 1; nu < 4; nu++) {
251 _clover_F_gpu(&clover, v, ix, mu, nu, iup_gpu, idn_gpu);
252 _algebra_vector_sqnorm_g(p, clover);
253 resField[
id + N * tc] += p;
255 for (mu = 1; mu < 3; mu++) {
256 for (
int nu = mu + 1; nu < 4; nu++) {
257 _clover_F_gpu(&clover, v, ix, mu, nu, iup_gpu, idn_gpu);
258 _algebra_vector_sqnorm_g(p, clover);
259 resField[
id + 2 * N * tc] += p;
265__global__
void _topo_gpu(
double *resField, suNg *v,
int *iup_gpu,
int *idn_gpu,
int N,
int block_start) {
266 for (
int id = blockIdx.x * blockDim.x + threadIdx.x;
id < N;
id += blockDim.x * gridDim.x) {
267 const int ix =
id + block_start;
269 suNg_algebra_vector F1, F2;
270 _clover_F_gpu(&F1, v, ix, 1, 2, iup_gpu, idn_gpu);
271 _clover_F_gpu(&F2, v, ix, 0, 3, iup_gpu, idn_gpu);
272 for (
int i = 0; i < NG * NG - 1; i++) {
273 resField[ix] += F1.c[i] * F2.c[i];
276 _clover_F_gpu(&F1, v, ix, 1, 3, iup_gpu, idn_gpu);
277 _clover_F_gpu(&F2, v, ix, 0, 2, iup_gpu, idn_gpu);
278 for (
int i = 0; i < NG * NG - 1; i++) {
279 resField[ix] -= F1.c[i] * F2.c[i];
282 _clover_F_gpu(&F1, v, ix, 0, 1, iup_gpu, idn_gpu);
283 _clover_F_gpu(&F2, v, ix, 2, 3, iup_gpu, idn_gpu);
284 for (
int i = 0; i < NG * NG - 1; i++) {
285 resField[ix] += F1.c[i] * F2.c[i];
This file contains information on the geometry of the local lattice, block decomposed geometry,...