HiRep 0.1
Loading...
Searching...
No Matches
SIMD_vector_hirep.h
1#ifndef SIMD_VECTOR_HIREP_H
2#define SIMD_VECTOR_HIREP_H
3
4#ifdef SIMD_VECTOR_HIREP
5
6#if (NF == 3) || (NG == 3)
7#include "IO/logger.h"
8typedef double suNg_vector_V __attribute__((vector_size(2 * NG * sizeof(double))));
9
10#define _alt_sign_vector ((suNg_vector_V){ -1, +1, -1, +1, -1, +1 })
11
12#define _conj_sign_vector ((suNg_vector_V){ +1, -1, +1, -1, +1, -1 })
13
14typedef struct suNg_V {
15 suNg_vector_V c[NG];
16} suNg_V;
17
18#define _mul_add(a, b, c) ((a) * (b) + (c))
19
20#define _decompose_suNg_vector_V(reV, imV, inputV) \
21 const suNg_vector_V(reV) = __builtin_shufflevector((inputV), (inputV), 0, 0, 2, 2, 4, 4); \
22 const suNg_vector_V(imV) = __builtin_shufflevector((inputV), (inputV), 1, 1, 3, 3, 5, 5)
23
24#define _re_suNg_vector_V(inputV) __builtin_shufflevector((inputV), (inputV), 0, 0, 2, 2, 4, 4)
25
26#define _im_suNg_vector_V(inputV) __builtin_shufflevector((inputV), (inputV), 1, 1, 3, 3, 5, 5)
27
28#define _select_suNg_vector_V(inputV, i, j, sl, isl) \
29 sl = __builtin_shufflevector((inputV), (inputV), i, j, i, j, i, j); \
30 Isl = _conj_sign_vector * __builtin_shufflevector((inputV), (inputV), j, i, j, i, j, i)
31
32#define _shuffle_suNg_vector_V(resV, inputV) \
33 const suNg_vector_V(resV) = __builtin_shufflevector((inputV), (inputV), 1, 0, 3, 2, 5, 4);
34
35#define _alt_mul(U, V) ((_alt_sign_vector * U) * V)
36
37#define _vector_reduce(res, V1, V2, V3) \
38 do { \
39 suNg_vector_V C1 = __builtin_shufflevector((V1), (V2), 0, 1, 6, 7, -1, -1); \
40 C1[4] = (V3)[0]; \
41 C1[5] = (V3)[1]; \
42 suNg_vector_V C2 = __builtin_shufflevector((V1), (V2), 2, 3, 8, 9, -1, -1); \
43 C2[4] = (V3)[2]; \
44 C2[5] = (V3)[3]; \
45 suNg_vector_V C3 = __builtin_shufflevector((V1), (V2), 4, 5, 10, 11, -1, -1); \
46 C3[4] = (V3)[4]; \
47 C3[5] = (V3)[5]; \
48 res = C1 + C2 + C3; \
49 } while (0)
50
51#define _prod_V(res, reU, imU, V, invV) suNg_vector_V(res) = _mul_add(reU, V, _alt_mul(imU, invV))
52#define _prod_dag_V(res, U, V, IV) suNg_vector_V(res) = _mul_add(_re_suNg_vector_V(U), V, _im_suNg_vector_V(U) * IV)
53
54#define _MVM_3x3C_SIMD_VEC(r, u, s) \
55 do { \
56 const suNg_vector_V s_V = *(suNg_vector_V *)(&s); \
57 suNg_vector_V *r_V = (suNg_vector_V *)(&r); \
58 const suNg_vector_V u_c0 = *(suNg_vector_V *)&((u).c[0]); \
59 const suNg_vector_V u_c1 = *(suNg_vector_V *)&((u).c[3]); \
60 const suNg_vector_V u_c2 = *(suNg_vector_V *)&((u).c[6]); \
61 _decompose_suNg_vector_V(reS, imS, s_V); \
62 _shuffle_suNg_vector_V(invC0, u_c0); \
63 _prod_V(r1, reS, imS, u_c0, invC0); \
64 _shuffle_suNg_vector_V(invC1, u_c1); \
65 _prod_V(r2, reS, imS, u_c1, invC1); \
66 _shuffle_suNg_vector_V(invC2, u_c2); \
67 _prod_V(r3, reS, imS, u_c2, invC2); \
68 _vector_reduce(*r_V, r1, r2, r3); \
69 } while (0)
70
71#define _MTVM_3x3C_SIMD_VEC(r, u, s) \
72 do { \
73 const suNg_vector_V s_V = *(suNg_vector_V *)(&s); \
74 suNg_vector_V *r_V = (suNg_vector_V *)(&r); \
75 const suNg_vector_V u_c0 = *(suNg_vector_V *)&((u).c[0]); \
76 const suNg_vector_V u_c1 = *(suNg_vector_V *)&((u).c[3]); \
77 const suNg_vector_V u_c2 = *(suNg_vector_V *)&((u).c[6]); \
78 suNg_vector_V(sl); \
79 suNg_vector_V(Isl); \
80 _select_suNg_vector_V(s_V, 0, 1, sl, isl); \
81 _prod_dag_V(r1, (u_c0), sl, Isl); \
82 _select_suNg_vector_V(s_V, 2, 3, sl, isl); \
83 _prod_dag_V(r2, (u_c1), sl, Isl); \
84 _select_suNg_vector_V(s_V, 4, 5, sl, isl); \
85 _prod_dag_V(r3, (u_c2), sl, Isl); \
86 *r_V = r1 + r2 + r3; \
87 } while (0)
88
89#define _double_MVM_3x3C_SIMD_VEC(r1, r2, u, s1, s2) \
90 do { \
91 const suNg_vector_V s1_V = *(suNg_vector_V *)(&s1); \
92 const suNg_vector_V s2_V = *(suNg_vector_V *)(&s2); \
93 suNg_vector_V *r1_V = (suNg_vector_V *)(&r1); \
94 suNg_vector_V *r2_V = (suNg_vector_V *)(&r2); \
95 const suNg_vector_V u_c0 = *(suNg_vector_V *)&((u).c[0]); \
96 const suNg_vector_V u_c1 = *(suNg_vector_V *)&((u).c[3]); \
97 const suNg_vector_V u_c2 = *(suNg_vector_V *)&((u).c[6]); \
98 _decompose_suNg_vector_V(reS1, imS1, (s1_V)); \
99 _decompose_suNg_vector_V(reS2, imS2, (s2_V)); \
100 _shuffle_suNg_vector_V(invC0, (u_c0)); \
101 _prod_V(r11, reS1, imS1, (u_c0), invC0); \
102 _prod_V(r21, reS2, imS2, (u_c0), invC0); \
103 _shuffle_suNg_vector_V(invC1, (u_c1)); \
104 _prod_V(r12, reS1, imS1, (u_c1), invC1); \
105 _prod_V(r22, reS2, imS2, (u_c1), invC1); \
106 _shuffle_suNg_vector_V(invC2, (u_c2)); \
107 _prod_V(r13, reS1, imS1, (u_c2), invC2); \
108 _prod_V(r23, reS2, imS2, (u_c2), invC2); \
109 _vector_reduce(*r1_V, r11, r12, r13); \
110 _vector_reduce(*r2_V, r21, r22, r23); \
111 } while (0)
112
113#define _double_MTVM_3x3C_SIMD_VEC(r1, r2, u, s1, s2) \
114 do { \
115 const suNg_vector_V s1_V = *(suNg_vector_V *)(&s1); \
116 const suNg_vector_V s2_V = *(suNg_vector_V *)(&s2); \
117 suNg_vector_V *r1_V = (suNg_vector_V *)(&r1); \
118 suNg_vector_V *r2_V = (suNg_vector_V *)(&r2); \
119 const suNg_vector_V u_c0 = *(suNg_vector_V *)&((u).c[0]); \
120 const suNg_vector_V u_c1 = *(suNg_vector_V *)&((u).c[3]); \
121 const suNg_vector_V u_c2 = *(suNg_vector_V *)&((u).c[6]); \
122 suNg_vector_V(sl); \
123 suNg_vector_V(Isl); \
124 _select_suNg_vector_V(s1_V, 0, 1, sl, isl); \
125 _prod_dag_V(r11, (u_c0), sl, Isl); \
126 _select_suNg_vector_V(s2_V, 0, 1, sl, isl); \
127 _prod_dag_V(r21, (u_c0), sl, Isl); \
128 _select_suNg_vector_V(s1_V, 2, 3, sl, isl); \
129 _prod_dag_V(r12, (u_c1), sl, Isl); \
130 _select_suNg_vector_V(s2_V, 2, 3, sl, isl); \
131 _prod_dag_V(r22, (u_c1), sl, Isl); \
132 _select_suNg_vector_V(s1_V, 4, 5, sl, isl); \
133 _prod_dag_V(r13, (u_c2), sl, Isl); \
134 _select_suNg_vector_V(s2_V, 4, 5, sl, isl); \
135 _prod_dag_V(r23, (u_c2), sl, Isl); \
136 *r1_V = r11 + r12 + r13; \
137 *r2_V = r21 + r22 + r23; \
138 } while (0)
139
140#endif //(NF == 3) || (NG == 3)
141
142#if (NF == 3) && !defined(REPR_IS_REAL)
143// clang-format off
144
145#undef _suNf_multiply
146#define _suNf_multiply(mc, mu, mp) \
147 _Generic((mc), \
148 suNf_vector : ({ _MVM_3x3C_SIMD_VEC((mc), (mu), (mp)); }), \
149 default : ({ _suNf_multiply_default(mc, mu, mp); }))
150
151#undef _suNf_inverse_multiply
152#define _suNf_inverse_multiply(mc, mu, mp) \
153 _Generic((mc), \
154 suNf_vector : ({ _MTVM_3x3C_SIMD_VEC((mc), (mu), (mp)); }), \
155 default : ({ _suNf_inverse_multiply_default(mc, mu, mp); }))
156
157#undef _suNf_double_multiply
158#define _suNf_double_multiply(mc, mc2, mu, mp, mp2) \
159 _Generic((mc), \
160 suNf_vector : ({ _double_MVM_3x3C_SIMD_VEC((mc), (mc2), (mu), (mp), (mp2)); }),\
161 default : ({ _suNf_double_multiply_default(mc, mc2, mu, mp, mp2); }))
162
163#undef _suNf_double_inverse_multiply
164#define _suNf_double_inverse_multiply(mc, mc2, mu, mp, mp2) \
165 _Generic((mc), \
166 suNf_vector : ({ _double_MTVM_3x3C_SIMD_VEC((mc), (mc2), (mu), (mp), (mp2)); }),\
167 default : ({ _suNf_double_inverse_multiply_default(mc, mc2, mu, mp, mp2); }))
168
169#endif //(NF == 3) && !defined(REPR_IS_REAL)
170
171#if (NG == 3)
172#undef _suNg_multiply
173#define _suNg_multiply(mc, mu, mp) \
174 _Generic((mc), \
175 suNg_vector : ({ _MVM_3x3C_SIMD_VEC((mc), (mu), (mp)); }),\
176 default : ({ _suNg_multiply_default(mc, mu, mp); }))
177
178#undef _suNg_inverse_multiply
179#define _suNg_inverse_multiply(mc, mu, mp) \
180 _Generic((mc), \
181 suNg_vector : ({ _MTVM_3x3C_SIMD_VEC((mc), (mu), (mp)); }), \
182 default : ({ _suNg_inverse_multiply_default(mc, mu, mp); }))
183
184#undef _suNg_double_multiply
185#define _suNg_double_multiply(mc, mc2, mu, mp, mp2) \
186 _Generic((mc), \
187 suNg_vector : ({ _double_MVM_3x3C_SIMD_VEC((mc), (mc2), (mu), (mp), (mp2)); }),\
188 default : ({ _suNg_double_multiply_default(mc, mc2, mu, mp, mp2); }))
189
190#undef _suNg_double_inverse_multiply
191#define _suNg_double_inverse_multiply(mc, mc2, mu, mp, mp2) \
192 _Generic((mc), \
193 suNg_vector : ({ _double_MTVM_3x3C_SIMD_VEC((mc), (mc2), (mu), (mp), (mp2)); }),\
194 default : ({ _suNg_double_inverse_multiply_default(mc, mc2, mu, mp, mp2); }))
195// clang-format on
196
197#endif //(NG == 3)
198
199#endif // SIMD_VECTOR_HIREP
200
201#endif // SIMD_VECTOR_HIREP_H
Simple output logging facility.