HiRep
0.1
Loading...
Searching...
No Matches
SIMD_vector_hirep.h
1
#ifndef SIMD_VECTOR_HIREP_H
2
#define SIMD_VECTOR_HIREP_H
3
4
#ifdef SIMD_VECTOR_HIREP
5
6
#if (NF == 3) || (NG == 3)
7
#include "
IO/logger.h
"
8
typedef
double
suNg_vector_V __attribute__((vector_size(2 * NG *
sizeof
(
double
))));
9
10
#define _alt_sign_vector ((suNg_vector_V){ -1, +1, -1, +1, -1, +1 })
11
12
#define _conj_sign_vector ((suNg_vector_V){ +1, -1, +1, -1, +1, -1 })
13
14
typedef
struct
suNg_V {
15
suNg_vector_V c[NG];
16
} suNg_V;
17
18
#define _mul_add(a, b, c) ((a) * (b) + (c))
19
20
#define _decompose_suNg_vector_V(reV, imV, inputV) \
21
const suNg_vector_V(reV) = __builtin_shufflevector((inputV), (inputV), 0, 0, 2, 2, 4, 4); \
22
const suNg_vector_V(imV) = __builtin_shufflevector((inputV), (inputV), 1, 1, 3, 3, 5, 5)
23
24
#define _re_suNg_vector_V(inputV) __builtin_shufflevector((inputV), (inputV), 0, 0, 2, 2, 4, 4)
25
26
#define _im_suNg_vector_V(inputV) __builtin_shufflevector((inputV), (inputV), 1, 1, 3, 3, 5, 5)
27
28
#define _select_suNg_vector_V(inputV, i, j, sl, isl) \
29
sl = __builtin_shufflevector((inputV), (inputV), i, j, i, j, i, j); \
30
Isl = _conj_sign_vector * __builtin_shufflevector((inputV), (inputV), j, i, j, i, j, i)
31
32
#define _shuffle_suNg_vector_V(resV, inputV) \
33
const suNg_vector_V(resV) = __builtin_shufflevector((inputV), (inputV), 1, 0, 3, 2, 5, 4);
34
35
#define _alt_mul(U, V) ((_alt_sign_vector * U) * V)
36
37
#define _vector_reduce(res, V1, V2, V3) \
38
do { \
39
suNg_vector_V C1 = __builtin_shufflevector((V1), (V2), 0, 1, 6, 7, -1, -1); \
40
C1[4] = (V3)[0]; \
41
C1[5] = (V3)[1]; \
42
suNg_vector_V C2 = __builtin_shufflevector((V1), (V2), 2, 3, 8, 9, -1, -1); \
43
C2[4] = (V3)[2]; \
44
C2[5] = (V3)[3]; \
45
suNg_vector_V C3 = __builtin_shufflevector((V1), (V2), 4, 5, 10, 11, -1, -1); \
46
C3[4] = (V3)[4]; \
47
C3[5] = (V3)[5]; \
48
res = C1 + C2 + C3; \
49
} while (0)
50
51
#define _prod_V(res, reU, imU, V, invV) suNg_vector_V(res) = _mul_add(reU, V, _alt_mul(imU, invV))
52
#define _prod_dag_V(res, U, V, IV) suNg_vector_V(res) = _mul_add(_re_suNg_vector_V(U), V, _im_suNg_vector_V(U) * IV)
53
54
#define _MVM_3x3C_SIMD_VEC(r, u, s) \
55
do { \
56
const suNg_vector_V s_V = *(suNg_vector_V *)(&s); \
57
suNg_vector_V *r_V = (suNg_vector_V *)(&r); \
58
const suNg_vector_V u_c0 = *(suNg_vector_V *)&((u).c[0]); \
59
const suNg_vector_V u_c1 = *(suNg_vector_V *)&((u).c[3]); \
60
const suNg_vector_V u_c2 = *(suNg_vector_V *)&((u).c[6]); \
61
_decompose_suNg_vector_V(reS, imS, s_V); \
62
_shuffle_suNg_vector_V(invC0, u_c0); \
63
_prod_V(r1, reS, imS, u_c0, invC0); \
64
_shuffle_suNg_vector_V(invC1, u_c1); \
65
_prod_V(r2, reS, imS, u_c1, invC1); \
66
_shuffle_suNg_vector_V(invC2, u_c2); \
67
_prod_V(r3, reS, imS, u_c2, invC2); \
68
_vector_reduce(*r_V, r1, r2, r3); \
69
} while (0)
70
71
#define _MTVM_3x3C_SIMD_VEC(r, u, s) \
72
do { \
73
const suNg_vector_V s_V = *(suNg_vector_V *)(&s); \
74
suNg_vector_V *r_V = (suNg_vector_V *)(&r); \
75
const suNg_vector_V u_c0 = *(suNg_vector_V *)&((u).c[0]); \
76
const suNg_vector_V u_c1 = *(suNg_vector_V *)&((u).c[3]); \
77
const suNg_vector_V u_c2 = *(suNg_vector_V *)&((u).c[6]); \
78
suNg_vector_V(sl); \
79
suNg_vector_V(Isl); \
80
_select_suNg_vector_V(s_V, 0, 1, sl, isl); \
81
_prod_dag_V(r1, (u_c0), sl, Isl); \
82
_select_suNg_vector_V(s_V, 2, 3, sl, isl); \
83
_prod_dag_V(r2, (u_c1), sl, Isl); \
84
_select_suNg_vector_V(s_V, 4, 5, sl, isl); \
85
_prod_dag_V(r3, (u_c2), sl, Isl); \
86
*r_V = r1 + r2 + r3; \
87
} while (0)
88
89
#define _double_MVM_3x3C_SIMD_VEC(r1, r2, u, s1, s2) \
90
do { \
91
const suNg_vector_V s1_V = *(suNg_vector_V *)(&s1); \
92
const suNg_vector_V s2_V = *(suNg_vector_V *)(&s2); \
93
suNg_vector_V *r1_V = (suNg_vector_V *)(&r1); \
94
suNg_vector_V *r2_V = (suNg_vector_V *)(&r2); \
95
const suNg_vector_V u_c0 = *(suNg_vector_V *)&((u).c[0]); \
96
const suNg_vector_V u_c1 = *(suNg_vector_V *)&((u).c[3]); \
97
const suNg_vector_V u_c2 = *(suNg_vector_V *)&((u).c[6]); \
98
_decompose_suNg_vector_V(reS1, imS1, (s1_V)); \
99
_decompose_suNg_vector_V(reS2, imS2, (s2_V)); \
100
_shuffle_suNg_vector_V(invC0, (u_c0)); \
101
_prod_V(r11, reS1, imS1, (u_c0), invC0); \
102
_prod_V(r21, reS2, imS2, (u_c0), invC0); \
103
_shuffle_suNg_vector_V(invC1, (u_c1)); \
104
_prod_V(r12, reS1, imS1, (u_c1), invC1); \
105
_prod_V(r22, reS2, imS2, (u_c1), invC1); \
106
_shuffle_suNg_vector_V(invC2, (u_c2)); \
107
_prod_V(r13, reS1, imS1, (u_c2), invC2); \
108
_prod_V(r23, reS2, imS2, (u_c2), invC2); \
109
_vector_reduce(*r1_V, r11, r12, r13); \
110
_vector_reduce(*r2_V, r21, r22, r23); \
111
} while (0)
112
113
#define _double_MTVM_3x3C_SIMD_VEC(r1, r2, u, s1, s2) \
114
do { \
115
const suNg_vector_V s1_V = *(suNg_vector_V *)(&s1); \
116
const suNg_vector_V s2_V = *(suNg_vector_V *)(&s2); \
117
suNg_vector_V *r1_V = (suNg_vector_V *)(&r1); \
118
suNg_vector_V *r2_V = (suNg_vector_V *)(&r2); \
119
const suNg_vector_V u_c0 = *(suNg_vector_V *)&((u).c[0]); \
120
const suNg_vector_V u_c1 = *(suNg_vector_V *)&((u).c[3]); \
121
const suNg_vector_V u_c2 = *(suNg_vector_V *)&((u).c[6]); \
122
suNg_vector_V(sl); \
123
suNg_vector_V(Isl); \
124
_select_suNg_vector_V(s1_V, 0, 1, sl, isl); \
125
_prod_dag_V(r11, (u_c0), sl, Isl); \
126
_select_suNg_vector_V(s2_V, 0, 1, sl, isl); \
127
_prod_dag_V(r21, (u_c0), sl, Isl); \
128
_select_suNg_vector_V(s1_V, 2, 3, sl, isl); \
129
_prod_dag_V(r12, (u_c1), sl, Isl); \
130
_select_suNg_vector_V(s2_V, 2, 3, sl, isl); \
131
_prod_dag_V(r22, (u_c1), sl, Isl); \
132
_select_suNg_vector_V(s1_V, 4, 5, sl, isl); \
133
_prod_dag_V(r13, (u_c2), sl, Isl); \
134
_select_suNg_vector_V(s2_V, 4, 5, sl, isl); \
135
_prod_dag_V(r23, (u_c2), sl, Isl); \
136
*r1_V = r11 + r12 + r13; \
137
*r2_V = r21 + r22 + r23; \
138
} while (0)
139
140
#endif
//(NF == 3) || (NG == 3)
141
142
#if (NF == 3) && !defined(REPR_IS_REAL)
143
// clang-format off
144
145
#undef _suNf_multiply
146
#define _suNf_multiply(mc, mu, mp) \
147
_Generic((mc), \
148
suNf_vector : ({ _MVM_3x3C_SIMD_VEC((mc), (mu), (mp)); }), \
149
default : ({ _suNf_multiply_default(mc, mu, mp); }))
150
151
#undef _suNf_inverse_multiply
152
#define _suNf_inverse_multiply(mc, mu, mp) \
153
_Generic((mc), \
154
suNf_vector : ({ _MTVM_3x3C_SIMD_VEC((mc), (mu), (mp)); }), \
155
default : ({ _suNf_inverse_multiply_default(mc, mu, mp); }))
156
157
#undef _suNf_double_multiply
158
#define _suNf_double_multiply(mc, mc2, mu, mp, mp2) \
159
_Generic((mc), \
160
suNf_vector : ({ _double_MVM_3x3C_SIMD_VEC((mc), (mc2), (mu), (mp), (mp2)); }),\
161
default : ({ _suNf_double_multiply_default(mc, mc2, mu, mp, mp2); }))
162
163
#undef _suNf_double_inverse_multiply
164
#define _suNf_double_inverse_multiply(mc, mc2, mu, mp, mp2) \
165
_Generic((mc), \
166
suNf_vector : ({ _double_MTVM_3x3C_SIMD_VEC((mc), (mc2), (mu), (mp), (mp2)); }),\
167
default : ({ _suNf_double_inverse_multiply_default(mc, mc2, mu, mp, mp2); }))
168
169
#endif
//(NF == 3) && !defined(REPR_IS_REAL)
170
171
#if (NG == 3)
172
#undef _suNg_multiply
173
#define _suNg_multiply(mc, mu, mp) \
174
_Generic((mc), \
175
suNg_vector : ({ _MVM_3x3C_SIMD_VEC((mc), (mu), (mp)); }),\
176
default : ({ _suNg_multiply_default(mc, mu, mp); }))
177
178
#undef _suNg_inverse_multiply
179
#define _suNg_inverse_multiply(mc, mu, mp) \
180
_Generic((mc), \
181
suNg_vector : ({ _MTVM_3x3C_SIMD_VEC((mc), (mu), (mp)); }), \
182
default : ({ _suNg_inverse_multiply_default(mc, mu, mp); }))
183
184
#undef _suNg_double_multiply
185
#define _suNg_double_multiply(mc, mc2, mu, mp, mp2) \
186
_Generic((mc), \
187
suNg_vector : ({ _double_MVM_3x3C_SIMD_VEC((mc), (mc2), (mu), (mp), (mp2)); }),\
188
default : ({ _suNg_double_multiply_default(mc, mc2, mu, mp, mp2); }))
189
190
#undef _suNg_double_inverse_multiply
191
#define _suNg_double_inverse_multiply(mc, mc2, mu, mp, mp2) \
192
_Generic((mc), \
193
suNg_vector : ({ _double_MTVM_3x3C_SIMD_VEC((mc), (mc2), (mu), (mp), (mp2)); }),\
194
default : ({ _suNg_double_inverse_multiply_default(mc, mc2, mu, mp, mp2); }))
195
// clang-format on
196
197
#endif
//(NG == 3)
198
199
#endif
// SIMD_VECTOR_HIREP
200
201
#endif
// SIMD_VECTOR_HIREP_H
logger.h
Simple output logging facility.
Include
Core
SIMD_vector_hirep.h
Generated by
1.12.0