11#include "libhr_core.h"
13__device__ __forceinline__
void staples_dev(
int ix,
int mu, suNg *v, suNg *gauge,
int *iup_gpu,
int *idn_gpu,
14 double *plaq_weight) {
15 suNg staple, tr1, tr2;
17 int nu, i, ixpmu, ixpnu, ixmnu, ixpmumnu;
19 ixpmu = iup_gpu[4 * ix + mu];
22 for (i = 1; i < 4; i++) {
24 ixpnu = iup_gpu[4 * ix + nu];
25 ixmnu = idn_gpu[4 * ix + nu];
26 ixpmumnu = idn_gpu[4 * ixpmu + nu];
29 read_gpu<double>(0, &p1, gauge, ix, nu, 4);
30 read_gpu<double>(0, &p2, gauge, ixpnu, mu, 4);
31 read_gpu<double>(0, &p3, gauge, ixpmu, nu, 4);
33 _suNg_times_suNg(tr2, p1, p2);
35 _suNg_dagger(tr1, p3);
36 _suNg_times_suNg(staple, tr2, tr1);
39 if (plaq_weight != NULL) { _suNg_mul(staple, plaq_weight[ix * 16 + nu * 4 + mu], staple); }
41 _suNg_add_assign(*v, staple);
44 read_gpu<double>(0, &p1, gauge, ixmnu, mu, 4);
45 read_gpu<double>(0, &p2, gauge, ixpmumnu, nu, 4);
46 read_gpu<double>(0, &p3, gauge, ixmnu, nu, 4);
48 _suNg_times_suNg(tr2, p1, p2);
49 _suNg_dagger(tr1, p3);
50 _suNg_times_suNg(staple, tr1, tr2);
53 if (plaq_weight != NULL) { _suNg_mul(staple, plaq_weight[ixmnu * 16 + mu * 4 + nu], staple); }
55 _suNg_add_assign(*v, staple);