HiRep 0.1
Loading...
Searching...
No Matches
linear_algebra_generic_gpu.hpp
1#ifndef LINEAR_ALGEBRA_GENERIC_GPU_HPP
2#define LINEAR_ALGEBRA_GENERIC_GPU_HPP
3
4#ifdef __cplusplus
5
6#include "inverters.h"
7#include "libhr_core.h"
8
9#define _FUNC_GENERIC(_type, _name, _args) _type _name _args
10#define _DECLARE_LINA_HEADER(a, b, c) a b c;
11
12// double precision
13#define _FIELD_TYPE spinor_field
14#define _REAL double
15#define _COMPLEX hr_complex
16#include "TMPL/linear_algebra_reduction.h.tmpl"
17#include "TMPL/linear_algebra_base_operations.h.tmpl"
18#include "TMPL/linear_algebra_lc.h.tmpl"
19#include "TMPL/linear_algebra_gamma.h.tmpl"
20#include "TMPL/linear_algebra_base.h.tmpl" // This has to come last
21
22// single precision
23#define _FIELD_TYPE spinor_field_flt
24#define _REAL float
25#define _COMPLEX hr_complex_flt
26#include "TMPL/linear_algebra_reduction.h.tmpl"
27#include "TMPL/linear_algebra_base_operations.h.tmpl"
28#include "TMPL/linear_algebra_lc.h.tmpl"
29#include "TMPL/linear_algebra_gamma.h.tmpl"
30#include "TMPL/linear_algebra_base.h.tmpl"
31
32// double precision
33#define _FIELD_TYPE scalar_field
34#define _REAL double
35#define _COMPLEX hr_complex
36#include "TMPL/linear_algebra_reduction.h.tmpl"
37#include "TMPL/linear_algebra_base_operations.h.tmpl"
38#include "TMPL/linear_algebra_base.h.tmpl"
39
40#define _FIELD_TYPE suNg_field
41#define _REAL double
42#define _COMPLEX hr_complex
43#include "TMPL/linear_algebra_reduction.h.tmpl"
44#include "TMPL/linear_algebra_base_operations.h.tmpl"
45#include "TMPL/linear_algebra_base.h.tmpl"
46
47#define _FIELD_TYPE suNf_field
48#define _REAL double
49#define _COMPLEX hr_complex
50#include "TMPL/linear_algebra_reduction.h.tmpl"
51#include "TMPL/linear_algebra_base_operations.h.tmpl"
52#include "TMPL/linear_algebra_base.h.tmpl"
53
54#define _FIELD_TYPE suNfc_field
55#define _REAL double
56#define _COMPLEX hr_complex
57#include "TMPL/linear_algebra_reduction.h.tmpl"
58#include "TMPL/linear_algebra_base_operations.h.tmpl"
59#include "TMPL/linear_algebra_base.h.tmpl"
60
61#define _FIELD_TYPE suNg_field_flt
62#define _REAL float
63#define _COMPLEX hr_complex_flt
64#include "TMPL/linear_algebra_reduction.h.tmpl"
65#include "TMPL/linear_algebra_base_operations.h.tmpl"
66#include "TMPL/linear_algebra_base.h.tmpl"
67
68#define _FIELD_TYPE suNf_field_flt
69#define _REAL float
70#define _COMPLEX hr_complex_flt
71#include "TMPL/linear_algebra_reduction.h.tmpl"
72#include "TMPL/linear_algebra_base_operations.h.tmpl"
73#include "TMPL/linear_algebra_base.h.tmpl"
74
75#define _FIELD_TYPE suNg_scalar_field
76#define _REAL double
77#define _COMPLEX hr_complex
78#include "TMPL/linear_algebra_reduction.h.tmpl"
79#include "TMPL/linear_algebra_base_operations.h.tmpl"
80#include "TMPL/linear_algebra_base.h.tmpl"
81
82#define _FIELD_TYPE suNg_av_field
83#define _REAL double
84#define _COMPLEX hr_complex
85#include "TMPL/linear_algebra_reduction.h.tmpl"
86#include "TMPL/linear_algebra_base_operations.h.tmpl"
87#include "TMPL/linear_algebra_base.h.tmpl"
88
89#define _FIELD_TYPE gtransf
90#define _REAL double
91#define _COMPLEX hr_complex
92#include "TMPL/linear_algebra_reduction.h.tmpl"
93#include "TMPL/linear_algebra_base_operations.h.tmpl"
94#include "TMPL/linear_algebra_base.h.tmpl"
95
96#define _FIELD_TYPE ldl_field
97#define _REAL double
98#define _COMPLEX hr_complex
99#include "TMPL/linear_algebra_base.h.tmpl"
100
101#define _FIELD_TYPE clover_term
102#define _REAL double
103#define _COMPLEX hr_complex
104#include "TMPL/linear_algebra_reduction.h.tmpl"
105#include "TMPL/linear_algebra_base_operations.h.tmpl"
106#include "TMPL/linear_algebra_base.h.tmpl"
107
108#define _FIELD_TYPE clover_force
109#define _REAL double
110#define _COMPLEX hr_complex
111#include "TMPL/linear_algebra_reduction.h.tmpl"
112#include "TMPL/linear_algebra_base_operations.h.tmpl"
113#include "TMPL/linear_algebra_base.h.tmpl"
114
115#define _FIELD_TYPE staple_field
116#define _REAL double
117#define _COMPLEX hr_complex
118#include "TMPL/linear_algebra_reduction.h.tmpl"
119#include "TMPL/linear_algebra_base_operations.h.tmpl"
120#include "TMPL/linear_algebra_base.h.tmpl"
121
122// Cannot use error here because its undefined in device code
123//#define template_error error(1, 1, __func__, "Complex multiplication of real-valued field \n");
124#define template_error
125
126visible __forceinline__ void mul_add_assign(suNf_spinor *s1, double rho, suNf_spinor *s2) {
127 _spinor_mul_add_assign_f(*s1, rho, *s2);
128}
129
130visible __forceinline__ void mul_add_assign(suNf_spinor_flt *s1, double rho, suNf_spinor_flt *s2) {
131 _spinor_mul_add_assign_f(*s1, rho, *s2);
132}
133
134visible __forceinline__ void mul_add_assign(suNf *s1, double rho, suNf *s2) {
135 _suNf_mul_add(*s1, 1.0, *s1, rho, *s2);
136}
137
138#ifdef REPR_IS_REAL
139visible __forceinline__ void mul_add_assign(suNfc *s1, double rho, suNfc *s2) {
140 _suNfc_mul_add(*s1, 1.0, *s1, rho, *s2);
141}
142#endif
143
144visible __forceinline__ void mul_add_assign(suNg *s1, double rho, suNg *s2) {
145 _suNg_mul_add(*s1, 1.0, *s1, rho, *s2);
146}
147
148visible __forceinline__ void mul_add_assign(suNf_flt *s1, double rho, suNf_flt *s2) {
149 _suNf_mul_add(*s1, 1.0, *s1, rho, *s2);
150}
151
152visible __forceinline__ void mul_add_assign(suNg_flt *s1, double rho, suNg_flt *s2) {
153 _suNg_mul_add(*s1, 1.0, *s1, rho, *s2);
154}
155
156visible __forceinline__ void mul_add_assign(suNf_vector *s1, double rho, suNf_vector *s2) {
157 _vector_mul_add_assign_f(*s1, rho, *s2);
158}
159
160visible __forceinline__ void mul_add_assign(suNg_vector *s1, double rho, suNg_vector *s2) {
161 _vector_mul_add_assign_g(*s1, rho, *s2);
162}
163
164visible __forceinline__ void mul_add_assign(suNg_algebra_vector *s1, double rho, suNg_algebra_vector *s2) {
165 _algebra_vector_mul_add_assign_g(*s1, rho, *s2);
166}
167
168visible __forceinline__ void mul_add_assign(double *s1, double rho, double *s2) {
169 (*s1) += rho * (*s2);
170}
171
172visible __forceinline__ void mul_add_assign(float *s1, float rho, float *s2) {
173 (*s1) += rho * (*s2);
174}
175
176visible __forceinline__ void mulc_add_assign(suNf_spinor *s1, hr_complex rho, suNf_spinor *s2) {
177 _spinor_mulc_add_assign_f(*s1, rho, *s2);
178}
179
180visible __forceinline__ void mulc_add_assign(suNf_spinor_flt *s1, hr_complex_flt rho, suNf_spinor_flt *s2) {
181 _spinor_mulc_add_assign_f(*s1, rho, *s2);
182}
183
184visible __forceinline__ void mulc_add_assign(suNf *s1, hr_complex rho, suNf *s2) {
185#ifdef REPR_IS_REAL
186 template_error;
187#else
188 suNf tmp;
189 _suNf_mulc(tmp, rho, *s2);
190 _suNf_add_assign(*s1, tmp);
191#endif
192}
193
194#ifdef REPR_IS_REAL
195visible __forceinline__ void mulc_add_assign(suNfc *s1, hr_complex rho, suNfc *s2) {
196 suNfc tmp;
197 _suNfc_mulc(tmp, rho, *s2);
198 _suNfc_add_assign(*s1, tmp);
199}
200#endif
201
202visible __forceinline__ void mulc_add_assign(suNg *s1, hr_complex rho, suNg *s2) {
203 suNg tmp;
204 _suNg_mulc(tmp, rho, *s2);
205 _suNg_add_assign(*s1, tmp);
206}
207
208visible __forceinline__ void mulc_add_assign(suNf_flt *s1, hr_complex_flt rho, suNf_flt *s2) {
209#ifdef REPR_IS_REAL
210 template_error;
211#else
212 suNf_flt tmp;
213 _suNf_mulc(tmp, rho, *s2);
214 _suNf_add_assign(*s1, tmp);
215#endif
216}
217
218visible __forceinline__ void mulc_add_assign(suNg_flt *s1, hr_complex_flt rho, suNg_flt *s2) {
219 suNg_flt tmp;
220 _suNg_mulc(tmp, rho, *s2);
221 _suNg_add_assign(*s1, tmp);
222}
223
224visible __forceinline__ void mulc_add_assign(suNf_vector *s1, hr_complex rho, suNf_vector *s2) {
225 _vector_mulc_add_assign_f(*s1, rho, *s2);
226}
227
228visible __forceinline__ void mulc_add_assign(suNg_vector *s1, hr_complex rho, suNg_vector *s2) {
229 _vector_mulc_add_assign_f(*s1, rho, *s2);
230}
231
232visible __forceinline__ void mulc_add_assign(suNg_algebra_vector *s1, hr_complex rho, suNg_algebra_vector *s2) {
233 _algebra_vector_mul_g(*s1, creal(rho), *s2);
234}
235
236visible __forceinline__ void mulc_add_assign(double *s1, hr_complex rho, double *s2) {
237 // TODO: this needs to throw an error instead.
238 (*s1) += creal(rho) * (*s2);
239}
240
241visible __forceinline__ void mulc_add_assign(float *s1, hr_complex_flt rho, float *s2) {
242 // ERROR
243 (*s1) += creal(rho) * (*s2);
244}
245
246visible __forceinline__ void mul(suNf_spinor *s1, double rho, suNf_spinor *s2) {
247 _spinor_mul_f(*s1, rho, *s2);
248}
249
250visible __forceinline__ void mul(suNf_spinor_flt *s1, double rho, suNf_spinor_flt *s2) {
251 _spinor_mul_f(*s1, rho, *s2);
252}
253
254visible __forceinline__ void mul(suNf *s1, double rho, suNf *s2) {
255 _suNf_mul(*s1, rho, *s2);
256}
257
258#ifdef REPR_IS_REAL
259visible __forceinline__ void mul(suNfc *s1, double rho, suNfc *s2) {
260 _suNfc_mul(*s1, rho, *s2);
261}
262#endif
263
264visible __forceinline__ void mul(suNg *s1, double rho, suNg *s2) {
265 _suNg_mul(*s1, rho, *s2);
266}
267
268visible __forceinline__ void mul(suNf_flt *s1, double rho, suNf_flt *s2) {
269 _suNf_mul(*s1, rho, *s2);
270}
271
272visible __forceinline__ void mul(suNg_flt *s1, double rho, suNg_flt *s2) {
273 _suNg_mul(*s1, rho, *s2);
274}
275
276visible __forceinline__ void mul(suNf_vector *s1, double rho, suNf_vector *s2) {
277 _vector_mul_f(*s1, rho, *s2);
278}
279
280visible __forceinline__ void mul(suNg_vector *s1, double rho, suNg_vector *s2) {
281 _vector_mul_g(*s1, rho, *s2);
282}
283
284visible __forceinline__ void mul(suNg_algebra_vector *s1, double rho, suNg_algebra_vector *s2) {
285 _algebra_vector_mul_g(*s1, rho, *s2);
286}
287
288visible __forceinline__ void mul(double *s1, double rho, double *s2) {
289 (*s1) = rho * (*s2);
290}
291
292visible __forceinline__ void mul(float *s1, float rho, float *s2) {
293 (*s1) = rho * (*s2);
294}
295
296visible __forceinline__ void mulc(suNf_spinor *s1, hr_complex rho, suNf_spinor *s2) {
297 _spinor_mulc_f(*s1, rho, *s2);
298}
299
300visible __forceinline__ void mulc(suNf_spinor_flt *s1, hr_complex_flt rho, suNf_spinor_flt *s2) {
301 _spinor_mulc_f(*s1, rho, *s2);
302}
303
304visible __forceinline__ void mulc(suNf *s1, hr_complex rho, suNf *s2) {
305#ifdef REPR_IS_REAL
306 template_error;
307#else
308 _suNf_mulc(*s1, rho, *s2);
309#endif
310}
311
312#ifdef REPR_IS_REAL
313
314visible __forceinline__ void mulc(suNfc *s1, hr_complex rho, suNfc *s2) {
315 _suNfc_mulc(*s1, rho, *s2);
316}
317#endif
318
319visible __forceinline__ void mulc(suNg *s1, hr_complex rho, suNg *s2) {
320 _suNg_mulc(*s1, rho, *s2);
321}
322
323visible __forceinline__ void mulc(suNf_flt *s1, hr_complex_flt rho, suNf_flt *s2) {
324#ifdef REPR_IS_REAL
325 template_error;
326#else
327 _suNf_mulc(*s1, rho, *s2);
328#endif
329}
330
331visible __forceinline__ void mulc(suNg_flt *s1, hr_complex_flt rho, suNg_flt *s2) {
332 _suNg_mulc(*s1, rho, *s2);
333}
334
335visible __forceinline__ void mulc(suNf_vector *s1, hr_complex rho, suNf_vector *s2) {
336 _vector_mulc_f(*s1, rho, *s2);
337}
338
339visible __forceinline__ void mulc(suNg_vector *s1, hr_complex rho, suNg_vector *s2) {
340 _vector_mulc_g(*s1, rho, *s2);
341}
342
343visible __forceinline__ void mulc(suNg_algebra_vector *s1, hr_complex rho, suNg_algebra_vector *s2) {
344 _algebra_vector_mul_g(*s1, creal(rho), *s2);
345}
346
347visible __forceinline__ void mulc(double *s1, hr_complex rho, double *s2) {
348 // Error!
349}
350
351visible __forceinline__ void mulc(float *s1, hr_complex rho, float *s2) {
352 // Error
353}
354
355visible __forceinline__ void add(suNf_spinor *r, suNf_spinor *s1, suNf_spinor *s2) {
356 _spinor_add_f(*r, *s1, *s2);
357}
358
359visible __forceinline__ void add(suNf_spinor_flt *r, suNf_spinor_flt *s1, suNf_spinor_flt *s2) {
360 _spinor_add_f(*r, *s1, *s2);
361}
362
363visible __forceinline__ void add(suNf *r, suNf *s1, suNf *s2) {
364 _suNf_mul_add(*r, 1.0, *s1, 1.0, *s2);
365}
366
367#ifdef REPR_IS_REAL
368visible __forceinline__ void add(suNfc *r, suNfc *s1, suNfc *s2) {
369 _suNf_mul_add(*r, 1.0, *s1, 1.0, *s2);
370}
371#endif
372
373visible __forceinline__ void add(suNg *r, suNg *s1, suNg *s2) {
374 _suNg_mul_add(*r, 1.0, *s1, 1.0, *s2);
375}
376
377visible __forceinline__ void add(suNf_flt *r, suNf_flt *s1, suNf_flt *s2) {
378 _suNf_mul_add(*r, 1.0, *s1, 1.0, *s2);
379}
380
381visible __forceinline__ void add(suNg_flt *r, suNg_flt *s1, suNg_flt *s2) {
382 _suNg_mul_add(*r, 1.0, *s1, 1.0, *s2);
383}
384
385visible __forceinline__ void add(suNf_vector *r, suNf_vector *s1, suNf_vector *s2) {
386 _vector_add_f(*r, *s1, *s2);
387}
388
389visible __forceinline__ void add(suNg_vector *r, suNg_vector *s1, suNg_vector *s2) {
390 _vector_add_g(*r, *s1, *s2);
391}
392
393visible __forceinline__ void add(suNg_algebra_vector *r, suNg_algebra_vector *s1, suNg_algebra_vector *s2) {
394 _algebra_vector_zero_g(*r);
395 _algebra_vector_add_assign_g(*r, *s1);
396 _algebra_vector_add_assign_g(*r, *s2);
397}
398
399visible __forceinline__ void add(double *r, double *s1, double *s2) {
400 (*r) = (*s1) + (*s2);
401}
402
403visible __forceinline__ void add(float *r, float *s1, float *s2) {
404 (*r) = (*s1) + (*s2);
405}
406
407visible __forceinline__ void sub(suNf_spinor *r, suNf_spinor *s1, suNf_spinor *s2) {
408 _spinor_sub_f(*r, *s1, *s2);
409}
410
411visible __forceinline__ void sub(suNf_spinor_flt *r, suNf_spinor_flt *s1, suNf_spinor_flt *s2) {
412 _spinor_sub_f(*r, *s1, *s2);
413}
414
415visible __forceinline__ void sub(suNf *r, suNf *s1, suNf *s2) {
416 _suNf_mul_add(*r, 1.0, *s1, -1.0, *s2);
417}
418
419#ifdef REPR_IS_REAL
420visible __forceinline__ void sub(suNfc *r, suNfc *s1, suNfc *s2) {
421 _suNfc_mul_add(*r, 1.0, *s1, -1.0, *s2);
422}
423#endif
424
425visible __forceinline__ void sub(suNg *r, suNg *s1, suNg *s2) {
426 _suNg_mul_add(*r, 1.0, *s1, -1.0, *s2);
427}
428
429visible __forceinline__ void sub(suNf_flt *r, suNf_flt *s1, suNf_flt *s2) {
430 _suNf_mul_add(*r, 1.0, *s1, -1.0, *s2);
431}
432
433visible __forceinline__ void sub(suNg_flt *r, suNg_flt *s1, suNg_flt *s2) {
434 _suNg_mul_add(*r, 1.0, *s1, -1.0, *s2);
435}
436
437visible __forceinline__ void sub(suNf_vector *r, suNf_vector *s1, suNf_vector *s2) {
438 _vector_sub_f(*r, *s1, *s2);
439}
440
441visible __forceinline__ void sub(suNg_vector *r, suNg_vector *s1, suNg_vector *s2) {
442 _vector_sub_g(*r, *s1, *s2);
443}
444
445visible __forceinline__ void sub(suNg_algebra_vector *r, suNg_algebra_vector *s1, suNg_algebra_vector *s2) {
446 _algebra_vector_zero_g(*r);
447 _algebra_vector_add_assign_g(*r, *s1);
448 _algebra_vector_sub_assign_g(*r, *s2);
449}
450
451visible __forceinline__ void sub(double *r, double *s1, double *s2) {
452 (*r) = (*s1) - (*s2);
453}
454
455visible __forceinline__ void sub(float *r, float *s1, float *s2) {
456 (*r) = (*s1) - (*s2);
457}
458
459visible __forceinline__ void sub_assign(suNf_spinor *s1, suNf_spinor *s2) {
460 _spinor_sub_assign_f(*s1, *s2);
461}
462
463visible __forceinline__ void sub_assign(suNf_spinor_flt *s1, suNf_spinor_flt *s2) {
464 _spinor_sub_assign_f(*s1, *s2);
465}
466
467visible __forceinline__ void sub_assign(suNf *s1, suNf *s2) {
468 _suNf_sub_assign(*s1, *s2);
469}
470
471#ifdef REPR_IS_REAL
472visible __forceinline__ void sub_assign(suNfc *s1, suNfc *s2) {
473 _suNfc_sub_assign(*s1, *s2);
474}
475#endif
476
477visible __forceinline__ void sub_assign(suNg *s1, suNg *s2) {
478 _suNg_sub_assign(*s1, *s2);
479}
480
481visible __forceinline__ void sub_assign(suNf_flt *s1, suNf_flt *s2) {
482 _suNf_sub_assign(*s1, *s2);
483}
484
485visible __forceinline__ void sub_assign(suNg_flt *s1, suNg_flt *s2) {
486 _suNg_sub_assign(*s1, *s2);
487}
488
489visible __forceinline__ void sub_assign(suNf_vector *s1, suNf_vector *s2) {
490 _vector_sub_assign_f(*s1, *s2);
491}
492
493visible __forceinline__ void sub_assign(suNg_vector *s1, suNg_vector *s2) {
494 _vector_sub_assign_g(*s1, *s2);
495}
496
497visible __forceinline__ void sub_assign(suNg_algebra_vector *s1, suNg_algebra_vector *s2) {
498 _algebra_vector_sub_assign_g(*s1, *s2);
499}
500
501visible __forceinline__ void sub_assign(double *s1, double *s2) {
502 (*s1) -= (*s2);
503}
504
505visible __forceinline__ void sub_assign(float *s1, float *s2) {
506 (*s1) -= (*s2);
507}
508
509visible __forceinline__ void minus(suNf_spinor *s1, suNf_spinor *s2) {
510 _spinor_minus_f(*s1, *s2);
511}
512
513visible __forceinline__ void minus(suNf_spinor_flt *s1, suNf_spinor_flt *s2) {
514 _spinor_minus_f(*s1, *s2);
515}
516
517visible __forceinline__ void minus(suNf *s1, suNf *s2) {
518 _suNf_minus(*s1, *s2);
519}
520
521#ifdef REPR_IS_REAL
522visible __forceinline__ void minus(suNfc *s1, suNfc *s2) {
523 _suNfc_minus(*s1, *s2);
524}
525#endif
526
527visible __forceinline__ void minus(suNg *s1, suNg *s2) {
528 _suNg_minus(*s1, *s2);
529}
530
531visible __forceinline__ void minus(suNf_flt *s1, suNf_flt *s2) {
532 _suNf_minus(*s1, *s2);
533}
534
535visible __forceinline__ void minus(suNg_flt *s1, suNg_flt *s2) {
536 _suNg_minus(*s1, *s2);
537}
538
539visible __forceinline__ void minus(suNf_vector *s1, suNf_vector *s2) {
540 _vector_minus_f(*s1, *s2);
541}
542
543visible __forceinline__ void minus(suNg_vector *s1, suNg_vector *s2) {
544 _vector_minus_g(*s1, *s2);
545}
546
547visible __forceinline__ void minus(suNg_algebra_vector *s1, suNg_algebra_vector *s2) {
548 _algebra_vector_mul_g(*s1, -1.0, *s2);
549}
550
551visible __forceinline__ void minus(double *s1, double *s2) {
552 (*s1) = -(*s2);
553}
554
555visible __forceinline__ void minus(float *s1, float *s2) {
556 (*s1) = -(*s2);
557}
558
559visible __forceinline__ void add_assign(suNf_spinor *s1, suNf_spinor *s2) {
560 _spinor_add_assign_f(*s1, *s2);
561}
562
563visible __forceinline__ void add_assign(suNf_spinor_flt *s1, suNf_spinor_flt *s2) {
564 _spinor_add_assign_f(*s1, *s2);
565}
566
567visible __forceinline__ void add_assign(suNf *s1, suNf *s2) {
568 _suNf_add_assign(*s1, *s2);
569}
570
571#ifdef REPR_IS_REAL
572visible __forceinline__ void add_assign(suNfc *s1, suNfc *s2) {
573 _suNfc_add_assign(*s1, *s2);
574}
575#endif
576
577visible __forceinline__ void add_assign(suNg *s1, suNg *s2) {
578 _suNg_add_assign(*s1, *s2);
579}
580
581visible __forceinline__ void add_assign(suNf_flt *s1, suNf_flt *s2) {
582 _suNf_add_assign(*s1, *s2);
583}
584
585visible __forceinline__ void add_assign(suNg_flt *s1, suNg_flt *s2) {
586 _suNg_add_assign(*s1, *s2);
587}
588
589visible __forceinline__ void add_assign(suNf_vector *s1, suNf_vector *s2) {
590 _vector_add_assign_f(*s1, *s2);
591}
592
593visible __forceinline__ void add_assign(suNg_vector *s1, suNg_vector *s2) {
594 _vector_add_assign_g(*s1, *s2);
595}
596
597visible __forceinline__ void add_assign(suNg_algebra_vector *s1, suNg_algebra_vector *s2) {
598 _algebra_vector_add_assign_g(*s1, *s2);
599}
600
601visible __forceinline__ void add_assign(double *s1, double *s2) {
602 (*s1) += (*s2);
603}
604
605visible __forceinline__ void add_assign(float *s1, float *s2) {
606 (*s1) += (*s2);
607}
608
609visible __forceinline__ double prod_re(suNf_spinor *s1, suNf_spinor *s2) {
610 double k = 0;
611 _spinor_prod_re_f(k, *s1, *s2);
612 return k;
613}
614
615visible __forceinline__ double prod_re(suNf_spinor_flt *s1, suNf_spinor_flt *s2) {
616 double k = 0;
617 _spinor_prod_re_f(k, *s1, *s2);
618 return k;
619}
620
621visible __forceinline__ double prod_re(suNf *s1, suNf *s2) {
622 suNf tmp;
623 double res = 0.0;
624 _suNf_dagger_times_suNf(tmp, *s1, *s2);
625 _suNf_trace_re(res, tmp);
626 return res;
627}
628
629#ifdef REPR_IS_REAL
630visible __forceinline__ double prod_re(suNfc *s1, suNfc *s2) {
631 suNfc tmp;
632 hr_complex res = 0.0;
633 _suNfc_dagger_times_suNfc(tmp, *s1, *s2);
634 _suNfc_trace(res, tmp);
635 return creal(res);
636}
637#endif
638
639visible __forceinline__ double prod_re(suNg *s1, suNg *s2) {
640 suNg tmp;
641 double res = 0.0;
642 _suNg_dagger_times_suNg(tmp, *s1, *s2);
643 _suNg_trace_re(res, tmp);
644 return res;
645}
646
647visible __forceinline__ double prod_re(suNf_flt *s1, suNf_flt *s2) {
648 suNf_flt tmp;
649 double res = 0.0;
650 _suNf_dagger_times_suNf(tmp, *s1, *s2);
651 _suNf_trace_re(res, tmp);
652 return res;
653}
654
655visible __forceinline__ double prod_re(suNg_flt *s1, suNg_flt *s2) {
656 suNg_flt tmp;
657 double res = 0.0;
658 _suNg_dagger_times_suNg(tmp, *s1, *s2);
659 _suNg_trace_re(res, tmp);
660 return res;
661}
662
663visible __forceinline__ double prod_re(suNf_vector *s1, suNf_vector *s2) {
664 double prod;
665 _vector_prod_re_f(prod, *s1, *s2);
666 return prod;
667}
668
669visible __forceinline__ double prod_re(suNg_vector *s1, suNg_vector *s2) {
670 double prod;
671 _vector_prod_re_g(prod, *s1, *s2);
672 return prod;
673}
674
675visible __forceinline__ double prod_re(suNg_algebra_vector *s1, suNg_algebra_vector *s2) {
676 double prod;
677 _algebra_vector_prod_g(prod, *s1, *s2);
678 return prod;
679}
680
681visible __forceinline__ double prod_re(double *s1, double *s2) {
682 return (*s1) * (*s2);
683}
684
685visible __forceinline__ double prod_re(float *s1, float *s2) {
686 return (*s1) * (*s2);
687}
688
689visible __forceinline__ double prod_im(suNf_spinor *s1, suNf_spinor *s2) {
690 double k = 0;
691 _spinor_prod_im_f(k, *s1, *s2);
692 return k;
693}
694
695visible __forceinline__ double prod_im(suNf_spinor_flt *s1, suNf_spinor_flt *s2) {
696 double k = 0;
697 _spinor_prod_im_f(k, *s1, *s2);
698 return k;
699}
700
701visible __forceinline__ double prod_im(suNf *s1, suNf *s2) {
702#ifdef REPR_IS_REAL
703 return 0.0;
704#else
705 suNf tmp;
706 double res = 0.0;
707 _suNf_dagger_times_suNf(tmp, *s1, *s2);
708 _suNf_trace_im(res, tmp);
709 return res;
710#endif
711}
712
713#ifdef REPR_IS_REAL
714visible __forceinline__ double prod_im(suNfc *s1, suNfc *s2) {
715 return 0.0;
716 /*suNfc tmp;
717 double res = 0.0;
718 _suNfc_dagger_times_suNfc(tmp, *s1, *s2);
719 _suNfc_trace_im(res, tmp);
720 return res;*/
721}
722#endif
723
724visible __forceinline__ double prod_im(suNg *s1, suNg *s2) {
725 suNg tmp;
726 double res = 0.0;
727 _suNg_dagger_times_suNg(tmp, *s1, *s2);
728 _suNg_trace_im(res, tmp);
729 return res;
730}
731
732visible __forceinline__ double prod_im(suNf_flt *s1, suNf_flt *s2) {
733#ifdef REPR_IS_REAL
734 return 0.0;
735#else
736 suNf_flt tmp;
737 double res = 0.0;
738 _suNf_dagger_times_suNf(tmp, *s1, *s2);
739 _suNf_trace_im(res, tmp);
740 return res;
741#endif
742}
743
744visible __forceinline__ double prod_im(suNg_flt *s1, suNg_flt *s2) {
745 suNg_flt tmp;
746 double res = 0.0;
747 _suNg_dagger_times_suNg(tmp, *s1, *s2);
748 _suNg_trace_im(res, tmp);
749 return res;
750}
751
752visible __forceinline__ double prod_im(suNf_vector *s1, suNf_vector *s2) {
753 double prod;
754 _vector_prod_im_g(prod, *s1, *s2);
755 return prod;
756}
757
758visible __forceinline__ double prod_im(suNg_vector *s1, suNg_vector *s2) {
759 double prod;
760 _vector_prod_im_g(prod, *s1, *s2);
761 return prod;
762}
763
764visible __forceinline__ double prod_im(suNg_algebra_vector *s1, suNg_algebra_vector *s2) {
765 return 0;
766}
767
768visible __forceinline__ double prod_im(double *s1, double *s2) {
769 return 0.0;
770}
771
772visible __forceinline__ double prod_im(float *s1, float *s2) {
773 return 0.0;
774}
775
776visible __forceinline__ hr_complex prod(suNf_spinor *s1, suNf_spinor *s2) {
777 hr_complex z = 0;
778 _spinor_prod_f(z, *s1, *s2);
779 return z;
780}
781
782visible __forceinline__ hr_complex prod(suNf_spinor_flt *s1, suNf_spinor_flt *s2) {
783 hr_complex_flt k = 0;
784 _spinor_prod_re_f(k, *s1, *s2);
785 hr_complex k1 = (double)creal(k) + I * (double)cimag(k);
786 return k1;
787}
788
789visible __forceinline__ hr_complex prod(suNf *s1, suNf *s2) {
790 suNf tmp;
791 hr_complex res;
792 _suNf_dagger_times_suNf(tmp, *s1, *s2);
793 _suNf_trace(res, tmp);
794 return res;
795}
796
797#ifdef REPR_IS_REAL
798visible __forceinline__ hr_complex prod(suNfc *s1, suNfc *s2) {
799 suNfc tmp;
800 hr_complex res;
801 _suNfc_dagger_times_suNfc(tmp, *s1, *s2);
802 _suNfc_trace(res, tmp);
803 return res;
804}
805#endif
806
807visible __forceinline__ hr_complex prod(suNg *s1, suNg *s2) {
808 suNg tmp;
809 hr_complex res;
810 _suNg_dagger_times_suNg(tmp, *s1, *s2);
811 _suNg_trace(res, tmp);
812 return res;
813}
814
815visible __forceinline__ hr_complex prod(suNf_flt *s1, suNf_flt *s2) {
816 suNf_flt tmp;
817 hr_complex res;
818 _suNf_dagger_times_suNf(tmp, *s1, *s2);
819 _suNf_trace_re(res, tmp);
820 return res;
821}
822
823visible __forceinline__ hr_complex prod(suNg_flt *s1, suNg_flt *s2) {
824 suNg_flt tmp;
825 hr_complex res;
826 _suNg_dagger_times_suNg(tmp, *s1, *s2);
827 _suNg_trace_re(res, tmp);
828 return res;
829}
830
831visible __forceinline__ hr_complex prod(suNf_vector *s1, suNf_vector *s2) {
832 hr_complex prod;
833 _vector_prod_f(prod, *s1, *s2);
834 return prod;
835}
836
837visible __forceinline__ hr_complex prod(suNg_vector *s1, suNg_vector *s2) {
838 hr_complex prod;
839 _vector_prod_g(prod, *s1, *s2);
840 return prod;
841}
842
843visible __forceinline__ hr_complex prod(suNg_algebra_vector *s1, suNg_algebra_vector *s2) {
844 hr_complex prod;
845 _algebra_vector_prod_g(prod, *s1, *s2);
846 return prod;
847}
848
849visible __forceinline__ hr_complex prod(double *s1, double *s2) {
850 hr_complex prod = (*s1) * (*s2);
851 return prod;
852}
853
854visible __forceinline__ hr_complex prod(float *s1, float *s2) {
855 hr_complex prod = ((double)((*s1) * (*s2)));
856 return prod;
857}
858
859visible __forceinline__ double sqnorm(suNf_spinor *r) {
860 double z = 0;
861 _spinor_prod_re_f(z, *r, *r);
862 return z;
863}
864
865visible __forceinline__ double sqnorm(suNf_spinor_flt *r) {
866 double z = 0;
867 _spinor_prod_re_f(z, *r, *r);
868 return z;
869}
870
871visible __forceinline__ double sqnorm(suNf *r) {
872 double norm;
873 _suNf_sqnorm(norm, *r);
874 return norm;
875}
876
877#ifdef REPR_IS_REAL
878visible __forceinline__ double sqnorm(suNfc *r) {
879 double norm;
880 _suNfc_sqnorm(norm, *r);
881 return norm;
882}
883#endif
884
885visible __forceinline__ double sqnorm(suNg *r) {
886 double norm;
887 _suNg_sqnorm(norm, *r);
888 return norm;
889}
890
891visible __forceinline__ double sqnorm(suNf_flt *r) {
892 double norm;
893 _suNf_sqnorm(norm, *r);
894 return norm;
895}
896
897visible __forceinline__ double sqnorm(suNg_flt *r) {
898 double norm;
899 _suNg_sqnorm(norm, *r);
900 return norm;
901}
902
903visible __forceinline__ double sqnorm(suNf_vector *r) {
904 double prod;
905 _vector_prod_re_f(prod, *r, *r);
906 return prod;
907}
908
909visible __forceinline__ double sqnorm(suNg_vector *r) {
910 double prod;
911 _vector_prod_re_g(prod, *r, *r);
912 return prod;
913}
914
915visible __forceinline__ double sqnorm(suNg_algebra_vector *r) {
916 double sqnorm;
917 _algebra_vector_sqnorm_g(sqnorm, *r);
918 return sqnorm;
919}
920
921visible __forceinline__ double sqnorm(double *r) {
922 return (*r) * (*r);
923}
924
925visible __forceinline__ double sqnorm(float *r) {
926 return ((double)((*r) * (*r)));
927}
928
929visible __forceinline__ double max(suNf_spinor *r) {
930 double *a = (double *)r;
931 double max = 0.;
932 for (int i = 0; i < sizeof(suNf_spinor) / sizeof(*a); i++) {
933 double v = fabs(a[i]);
934 if (max < v) { max = v; }
935 }
936 return max;
937}
938
939visible __forceinline__ double max(suNf_spinor_flt *r) {
940 double *a = (double *)r;
941 double max = 0.;
942 for (int i = 0; i < sizeof(suNf_spinor_flt) / sizeof(*a); i++) {
943 double v = fabs(a[i]);
944 if (max < v) { max = v; }
945 }
946 return max;
947}
948visible __forceinline__ double max(suNf *r) {
949 double *a = (double *)r;
950 double max = 0.;
951 for (int i = 0; i < sizeof(suNf) / sizeof(*a); i++) {
952 double v = fabs(a[i]);
953 if (max < v) { max = v; }
954 }
955 return max;
956}
957#ifdef REPR_IS_REAL
958visible __forceinline__ double max(suNfc *r) {
959 double *a = (double *)r;
960 double max = 0.;
961 for (int i = 0; i < sizeof(suNfc) / sizeof(*a); i++) {
962 double v = fabs(a[i]);
963 if (max < v) { max = v; }
964 }
965 return max;
966}
967#endif
968visible __forceinline__ double max(suNg *r) {
969 double *a = (double *)r;
970 double max = 0.;
971 for (int i = 0; i < sizeof(suNg) / sizeof(*a); i++) {
972 double v = fabs(a[i]);
973 if (max < v) { max = v; }
974 }
975 return max;
976}
977visible __forceinline__ double max(suNf_flt *r) {
978 double *a = (double *)r;
979 double max = 0.;
980 for (int i = 0; i < sizeof(suNf_flt) / sizeof(*a); i++) {
981 double v = fabs(a[i]);
982 if (max < v) { max = v; }
983 }
984 return max;
985}
986visible __forceinline__ double max(suNg_flt *r) {
987 double *a = (double *)r;
988 double max = 0.;
989 for (int i = 0; i < sizeof(suNg_flt) / sizeof(*a); i++) {
990 double v = fabs(a[i]);
991 if (max < v) { max = v; }
992 }
993 return max;
994}
995visible __forceinline__ double max(suNf_vector *r) {
996 double *a = (double *)r;
997 double max = 0.;
998 for (int i = 0; i < sizeof(suNf_vector) / sizeof(*a); i++) {
999 double v = fabs(a[i]);
1000 if (max < v) { max = v; }
1001 }
1002 return max;
1003}
1004visible __forceinline__ double max(suNg_vector *r) {
1005 double *a = (double *)r;
1006 double max = 0.;
1007 for (int i = 0; i < sizeof(suNg_vector) / sizeof(*a); i++) {
1008 double v = fabs(a[i]);
1009 if (max < v) { max = v; }
1010 }
1011 return max;
1012}
1013visible __forceinline__ double max(suNg_algebra_vector *r) {
1014 double *a = (double *)r;
1015 double max = 0.;
1016 for (int i = 0; i < sizeof(suNg_algebra_vector) / sizeof(*a); i++) {
1017 double v = fabs(a[i]);
1018 if (max < v) { max = v; }
1019 }
1020 return max;
1021}
1022visible __forceinline__ double max(double *r) {
1023 return *r;
1024}
1025visible __forceinline__ double max(float *r) {
1026 return (double)(*r);
1027}
1028
1029visible __forceinline__ double g5_prod_re(suNf_spinor *s1, suNf_spinor *s2) {
1030 double k = 0;
1031 _spinor_g5_prod_re_f(k, *s1, *s2);
1032 return k;
1033}
1034
1035visible __forceinline__ double g5_prod_re(suNf_spinor_flt *s1, suNf_spinor_flt *s2) {
1036 double z = 0;
1037 _spinor_g5_prod_re_f(z, *s1, *s2);
1038 return z;
1039}
1040
1041visible __forceinline__ double g5_prod_im(suNf_spinor *s1, suNf_spinor *s2) {
1042 double z = 0;
1043 _spinor_g5_prod_im_f(z, *s1, *s2);
1044 return z;
1045}
1046
1047visible __forceinline__ double g5_prod_im(suNf_spinor_flt *s1, suNf_spinor_flt *s2) {
1048 double z = 0;
1049 _spinor_g5_prod_im_f(z, *s1, *s2);
1050 return z;
1051}
1052
1053visible __forceinline__ void g5(suNf_spinor *s1, suNf_spinor *s2) {
1054 _spinor_g5_f(*s1, *s2);
1055}
1056
1057visible __forceinline__ void g5(suNf_spinor_flt *s1, suNf_spinor_flt *s2) {
1058 _spinor_g5_f(*s1, *s2);
1059}
1060
1061visible __forceinline__ void g5_assign(suNf_spinor *s) {
1062 _spinor_g5_assign_f(*s);
1063}
1064
1065visible __forceinline__ void g5_assign(suNf_spinor_flt *s) {
1066 _spinor_g5_assign_f(*s);
1067}
1068
1069visible __forceinline__ void g0(suNf_spinor *s1, suNf_spinor *s2) {
1070 _spinor_g0_f(*s1, *s2);
1071}
1072
1073visible __forceinline__ void g0(suNf_spinor_flt *s1, suNf_spinor_flt *s2) {
1074 _spinor_g0_f(*s1, *s2);
1075}
1076
1077visible __forceinline__ void g1(suNf_spinor *s1, suNf_spinor *s2) {
1078 _spinor_g1_f(*s1, *s2);
1079}
1080
1081visible __forceinline__ void g1(suNf_spinor_flt *s1, suNf_spinor_flt *s2) {
1082 _spinor_g1_f(*s1, *s2);
1083}
1084
1085visible __forceinline__ void g2(suNf_spinor *s1, suNf_spinor *s2) {
1086 _spinor_g2_f(*s1, *s2);
1087}
1088
1089visible __forceinline__ void g2(suNf_spinor_flt *s1, suNf_spinor_flt *s2) {
1090 _spinor_g2_f(*s1, *s2);
1091}
1092
1093visible __forceinline__ void g3(suNf_spinor *s1, suNf_spinor *s2) {
1094 _spinor_g3_f(*s1, *s2);
1095}
1096
1097visible __forceinline__ void g3(suNf_spinor_flt *s1, suNf_spinor_flt *s2) {
1098 _spinor_g3_f(*s1, *s2);
1099}
1100
1101visible __forceinline__ void lc(suNf_spinor *r, double k1, suNf_spinor *s1, double k2, suNf_spinor *s2) {
1102 _spinor_lc_f(*r, k1, *s1, k2, *s2);
1103}
1104
1105visible __forceinline__ void lc(suNf_spinor_flt *r, double k1, suNf_spinor_flt *s1, double k2, suNf_spinor_flt *s2) {
1106 _spinor_lc_f(*r, k1, *s1, k2, *s2);
1107}
1108
1109visible __forceinline__ void lc_add_assign(suNf_spinor *r, double k1, suNf_spinor *s1, double k2, suNf_spinor *s2) {
1110 _spinor_lc_add_assign_f(*r, k1, *s1, k2, *s2);
1111}
1112
1113visible __forceinline__ void lc_add_assign(suNf_spinor_flt *r, double k1, suNf_spinor_flt *s1, double k2, suNf_spinor_flt *s2) {
1114 _spinor_lc_add_assign_f(*r, k1, *s1, k2, *s2);
1115}
1116
1117visible __forceinline__ void clc(suNf_spinor *r, hr_complex k1, suNf_spinor *s1, hr_complex k2, suNf_spinor *s2) {
1118 _spinor_clc_f(*r, k1, *s1, k2, *s2);
1119}
1120
1121visible __forceinline__ void clc(suNf_spinor_flt *r, hr_complex k1, suNf_spinor_flt *s1, hr_complex k2, suNf_spinor_flt *s2) {
1122 _spinor_clc_f(*r, k1, *s1, k2, *s2);
1123}
1124
1125visible __forceinline__ void clc_add_assign(suNf_spinor *r, hr_complex k1, suNf_spinor *s1, hr_complex k2, suNf_spinor *s2) {
1126 _spinor_clc_add_assign_f(*r, k1, *s1, k2, *s2);
1127}
1128
1129visible __forceinline__ void clc_add_assign(suNf_spinor_flt *r, hr_complex k1, suNf_spinor_flt *s1, hr_complex k2,
1130 suNf_spinor_flt *s2) {
1131 _spinor_clc_add_assign_f(*r, k1, *s1, k2, *s2);
1132}
1133
1134visible __forceinline__ void zero(suNf_spinor *s) {
1135 _spinor_zero_f(*s);
1136}
1137
1138visible __forceinline__ void zero(suNf_spinor_flt *s) {
1139 _spinor_zero_f(*s);
1140}
1141
1142#ifdef REPR_IS_REAL
1143visible __forceinline__ void zero(suNfc *u) {
1144 _suNfc_zero(*u);
1145}
1146#endif
1147
1148visible __forceinline__ void zero(suNg *u) {
1149 _suNg_zero(*u);
1150}
1151
1152visible __forceinline__ void zero(suNf *u) {
1153 _suNf_zero(*u);
1154}
1155
1156visible __forceinline__ void zero(suNf_flt *u) {
1157 _suNf_zero(*u);
1158}
1159
1160visible __forceinline__ void zero(suNg_flt *u) {
1161 _suNg_zero(*u);
1162}
1163
1164visible __forceinline__ void zero(suNf_vector *v) {
1165 _vector_zero_f(*v);
1166}
1167
1168visible __forceinline__ void zero(suNg_vector *v) {
1169 _vector_zero_g(*v);
1170}
1171
1172visible __forceinline__ void zero(suNg_algebra_vector *v) {
1173 _algebra_vector_zero_g(*v);
1174}
1175
1176visible __forceinline__ void zero(double *d) {
1177 *d = 0.0;
1178}
1179
1180visible __forceinline__ void zero(float *f) {
1181 *f = 0.0f;
1182}
1183
1184visible __forceinline__ void zero(ldl_t *t) {
1185 // TODO: find a better way to do this
1186 memset(t, 0, sizeof(ldl_t));
1187}
1188
1189#undef _DECLARE_LINA_HEADER
1190
1191#endif
1192
1193#endif