14#include "libhr_core.h"
21static double const EPSILON_TEST = 1.e-14;
22static double const EPSILON_FLT_TEST = 1.e-4;
25#define synchronize cudaDeviceSynchronize();
31#define gpu_copy(_s) copy_to_gpu(_s)
32#define cpu_copy(_s) copy_from_gpu(_s)
33#define maxnorm(_s) max_cpu(_s)
34#define twonorm(_s) sqnorm_cpu(_s)
38#define maxnorm(_s) max(_s)
39#define twonorm(_s) sqnorm(_s)
42#define _FIELD_DESC(_s1) \
44 spinor_field *: "spinor field", \
45 spinor_field_flt *: "single precision spinor field", \
46 scalar_field *: "scalar field", \
47 suNg_field *: "suNg field", \
48 suNf_field *: "suNf field", \
49 suNfc_field *: "suNfc_field", \
50 suNg_field_flt *: "single precision suNg_field", \
51 suNf_field_flt *: "single precision suNf_field", \
52 suNg_scalar_field *: "suNg_scalar_field", \
53 suNg_av_field *: "avfield", \
54 gtransf *: "gtransf", \
55 clover_term *: "clover term", \
56 clover_force *: "clover force", \
57 staple_field *: "staple field")
59#define sanity_check(_n, _s) \
60 for (int _k = 0; _k < _n; ++_k) { \
61 lprintf("SANITY CHECK", 10, "L2 norm of field nr %d: %lf (should be nonzero)\n", _k, sqnorm(&_s[_k])); \
64#define compare_cpu_cpu(_errors, _out, _diff, _tag, _precision) \
66 sub_assign(_diff, _out); \
68 double res = maxnorm(_diff); \
69 double norm2 = twonorm(_diff); \
70 const char *msg = (res > _precision) ? ++_errors, "[FAIL]" : "[ OK ]"; \
71 lprintf(_tag, 2, "%s MAX norm=%.10e L2 norm=%.10e\n", msg, res, sqrt(norm2)); \
78#define compare_cpu_gpu(_errors, _out, _diff, _tag, _precision) \
80 copy_from_gpu(_diff); \
81 sub_assign_cpu(_diff, _out); \
82 double res = maxnorm(_diff); \
83 double norm2 = sqnorm_cpu(_diff); \
84 const char *msg = (res > _precision) ? ++errors, "[FAIL]" : "[ OK ]"; \
85 lprintf(_tag, 2, "%s MAX norm=%.10e L2 norm=%.10e\n", msg, res, sqrt(norm2));
88#define setup_random_fields_m(_n, _s) \
89 lprintf("MAIN", 10, "Setup random spinor fields\n"); \
90 for (int _i = 0; _i < _n; _i++) { \
91 random_field(&_s[_i]); \
94 sanity_check(_n, _s); \
95 lprintf("MAIN", 10, "Done setup random spinor fields\n");
97#define _TEST_CPU_INV_OP(_errors, _name, _ninputs, _in, _out, _test, _tag, _precision) \
99 setup_random_fields_m(_ninputs, _in); \
100 _test lprintf(_tag, 2, "%35s: ", _name); \
101 compare_cpu_cpu(_errors, _in, (_out), _tag, _precision); \
104#define _TEST_RED_INV_OP(_errors, _name, _ninputs, _in, _out, _test, _tag, _precision) \
106 setup_random_fields_m(_ninputs, _in); \
107 _test lprintf(_tag, 2, "%35s: ", _name); \
108 compare_diff(_errors, abs1, abs2, _tag, _precision); \
111#define _TEST_GPU_OP(_errors, _name, _ninputs, _in, _out, _test, _tag, _prec) \
113 setup_random_fields_m(_ninputs, _in); \
114 _test lprintf(_tag, 2, "%15s: ", _name); \
115 compare_cpu_gpu(_errors, _out, (_out + 1), _tag, _prec); \
118#define _TEST_RED_OP(_errors, _name, _ninputs, _in, _test, _tag, _prec) \
120 setup_random_fields_m(_ninputs, _in); \
121 _test lprintf(_tag, 2, "%15s: ", _name); \
122 compare_diff(_errors, abs1, abs2, _tag, _prec); \
125#define _WARMUP_SPEEDTEST(_clock, _n_warmup, _time_target, _n_reps, _operator) \
126 lprintf("LA TEST", 0, "Warmup application %d times.\n", _n_warmup); \
129 for (int i = 0; i < _n_warmup; ++i) { \
133 double elapsed = timer_lap(&clock) * 1.e-3; \
134 lprintf("LA TEST", 0, "total time: %lf msec\n", elapsed); \
135 lprintf("LA TEST", 0, "time single: %lf usec\n", elapsed / n_warmup * 1000.); \
136 n_reps = (int)(n_warmup * 1.01 * (_time_target / elapsed)); \
137 bcast_int(&n_reps, 1); \
140#define _RUN_SPEEDTEST(_clock, _n_warmup, _time_target, _n_reps, _flopsite, _bytesite, _operator) \
142 double __elapsed = 0; \
144 timer_lap(&_clock); \
145 for (int i = 0; i < _n_reps; ++i) { \
149 __elapsed = timer_lap(&clock) * 1.e-3; \
150 _n_reps = (int)((double)(n_reps * 1.01 * time_target) / __elapsed); \
151 bcast_int(&n_reps, 1); \
152 } while (__elapsed < _time_target * .95); \
154 lprintf("LA TEST", 0, "Number of repetitions: %d\n", _n_reps); \
155 lprintf("LA TEST", 0, "Total time: %lf msec\n", __elapsed); \
156 lprintf("LA TEST", 0, "Time single: %lf usec\n", __elapsed / _n_reps * 1000.); \
157 lprintf("LA TEST", 0, "GFLOPS: %1.6g\n", (((double)_n_reps * GLB_VOLUME) * _flopsite) / __elapsed / 1.e6); \
158 lprintf("LA TEST", 0, "BANDWIDTH: %1.6g GB/s\n\n", (((double)_n_reps * GLB_VOLUME) * _bytesite) / __elapsed / 1.e6); \
161double spinor_max(suNf_spinor *s);
162float spinor_max_flt(suNf_spinor_flt *s);
165void compare_diff(
int errors,
double abs1,
double abs2,
char tag[],
double prec);
166void compare_diff_flt(
int errors,
float abs1,
float abs2);
168void evaluate_timer_resolution(
Timer clock);
171void spinor_field_sanity_check(
int ninputs,
spinor_field *in);
174void setup_random_gauge_fields();
179int check_diff_norm(
double,
double);
180int check_diff_norm_zero(
double);
181int check_finiteness(
double);
194void random_gtransf_cpu(
gtransf *);
Clover Force.
Definition spinor_field.h:304
Clover term.
Definition spinor_field.h:288
Gauge transformation.
Definition spinor_field.h:336
LDL decomposition field needed for clover improvement.
Definition spinor_field.h:256
Scalar field of double precision real values.
Definition spinor_field.h:240
Spinor field array containing single precision SU(N_f) spinors in chosen fermion representation.
Definition spinor_field.h:207
Spinor field array containing SU(N_f) spinors in chosen fermion representation.
Definition spinor_field.h:189
Staple field for Luescher-Weisz
Definition spinor_field.h:320
Single precision gauge field in the chosen fermion representation.
Definition spinor_field.h:171
Gauge field in chosen fermion representation.
Definition spinor_field.h:153
Complexified su(N_f) field field.
Definition spinor_field.h:272
Field of SU(N_g) algebra vectors.
Definition spinor_field.h:224
Gauge field of single precision SU(N_g) matrices.
Definition spinor_field.h:135
Gauge field of SU(N_g) matrices.
Definition spinor_field.h:98
SU(N_g) scalar field of SU(N_g) vectors.
Definition spinor_field.h:117
void compare_diff(int errors, double abs1, double abs2, char tag[], double prec)
Check if the two inputs are the same within a given relative precision of EPSILON.
Definition test_utils.c:63
void setup_random_fields_flt(int n, spinor_field_flt s[])
Definition test_utils.c:149