1 /* altivec_conf.h, this file is part of the
2  * AltiVec optimized library for MJPEG tools MPEG-1/2 Video Encoder
3  * Copyright (C) 2002  James Klicman <james@klicman.org>
4  *
5  * This library is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  */
19 
20 /*
21  * development settings: VERIFY and BENCHMARK are mutually exclusive
22  */
23 
24 #undef ALTIVEC_VERIFY
25 #undef ALTIVEC_BENCHMARK
26 #undef ALTIVEC_AMBER
27 
28 #define ALTIVEC_DST /* use data stream touch */
29 
30 
31 #if defined(ALTIVEC_VERIFY)
32 /* {{{ */
33 /*
34  * define each function to verify with it's comparable function
35  */
36 #define ALTIVEC_TEST_build_sub44_mests_WITH         build_sub44_mests
37 #define ALTIVEC_TEST_build_sub22_mests_WITH         build_sub22_mests
38 #define ALTIVEC_TEST_find_best_one_pel_WITH         find_best_one_pel
39 #define ALTIVEC_TEST_sub_mean_reduction_WITH        sub_mean_reduction
40 #define ALTIVEC_TEST_sad_00_WITH                    sad_00
41 #define ALTIVEC_TEST_sad_01_WITH                    sad_01
42 #define ALTIVEC_TEST_sad_10_WITH                    sad_10
43 #define ALTIVEC_TEST_sad_11_WITH                    sad_11
44 #define ALTIVEC_TEST_bsad_WITH                      bsad
45 #define ALTIVEC_TEST_sumsq_WITH                     sumsq
46 #define ALTIVEC_TEST_bsumsq_WITH                    bsumsq
47 #define ALTIVEC_TEST_sumsq_sub22_WITH               sumsq_sub22
48 #define ALTIVEC_TEST_bsumsq_sub22_WITH              bsumsq_sub22
49 #define ALTIVEC_TEST_subsample_image_WITH           subsample_image
50 #define ALTIVEC_TEST_variance_WITH                  variance
51 #define ALTIVEC_TEST_quant_non_intra_WITH           quant_non_intra
52 #define ALTIVEC_TEST_quant_weight_coeff_intra_WITH  quant_weight_coeff_intra
53 #define ALTIVEC_TEST_quant_weight_coeff_inter_WITH  quant_weight_coeff_inter
54 #define ALTIVEC_TEST_iquant_non_intra_m1_WITH       iquant_non_intra_m1
55 #define ALTIVEC_TEST_iquant_non_intra_m2_WITH       iquant_non_intra_m2
56 #define ALTIVEC_TEST_iquant_intra_m1_WITH           iquant_intra_m1
57 #define ALTIVEC_TEST_iquant_intra_m2_WITH           iquant_intra_m2
58 #define ALTIVEC_TEST_add_pred_WITH                  add_pred
59 #define ALTIVEC_TEST_sub_pred_WITH                  sub_pred
60 #define ALTIVEC_TEST_pred_comp_WITH                 pred_comp
61 #define ALTIVEC_TEST_field_dct_best_WITH            field_dct_best
62 #define ALTIVEC_TEST_fdct_WITH                      /* output range test */
63 #define ALTIVEC_TEST_idct_WITH                      /* output range test */
64 
65 #  include "verify.h"
66 #  define ALTIVEC_TEST_SUFFIX(name) name##_altivec_verify
67 #  define ALTIVEC_FUNCTION(name,ret,def)                                     \
68     ret name##_altivec def;                                                  \
69     ret name##_altivec_verify def;
70 #  define ALTIVEC_TEST ALTIVEC_TEST_VERIFY
71 
72 /* }}} */
73 #elif defined(ALTIVEC_BENCHMARK)
74 /* {{{ */
75 /* define each function to benchmark with it's comparable function */
76 #undef  ALTIVEC_TEST_sad_00_WITH                    sad_00
77 #undef  ALTIVEC_TEST_sad_01_WITH                    sad_01
78 #undef  ALTIVEC_TEST_sad_10_WITH                    sad_10
79 #undef  ALTIVEC_TEST_sad_11_WITH                    sad_11
80 #undef  ALTIVEC_TEST_bsad_WITH                      bsad
81 #undef  ALTIVEC_TEST_sumsq_WITH                     sumsq
82 #undef  ALTIVEC_TEST_bsumsq_WITH                    bsumsq
83 #undef  ALTIVEC_TEST_sumsq_sub22_WITH               sumsq_sub22
84 #undef  ALTIVEC_TEST_bsumsq_sub22_WITH              bsumsq_sub22
85 #undef  ALTIVEC_TEST_quant_non_intra_WITH           quant_non_intra
86 #undef  ALTIVEC_TEST_quant_weight_coeff_intra_WITH  quant_weight_coeff_intra
87 #undef  ALTIVEC_TEST_quant_weight_coeff_inter_WITH  quant_weight_coeff_inter
88 #undef  ALTIVEC_TEST_variance_WITH                  variance
89 #undef  ALTIVEC_TEST_sub_pred_WITH                  sub_pred
90 #undef  ALTIVEC_TEST_add_pred_WITH                  add_pred
91 #undef  ALTIVEC_TEST_pred_comp_WITH                 pred_comp
92 #undef  ALTIVEC_TEST_subsample_image_WITH           subsample_image
93 #undef  ALTIVEC_TEST_field_dct_best_WITH            field_dct_best
94 /* the following functions may call other functions and should be */
95 /* benchmarked separately. */
96 #undef  ALTIVEC_TEST_find_best_one_pel_WITH         find_best_one_pel
97 #undef  ALTIVEC_TEST_build_sub44_mests_WITH         build_sub44_mests
98 #undef  ALTIVEC_TEST_build_sub22_mests_WITH         build_sub22_mests
99 /* can't benchmark the following functions since they modify their input */
100 #undef  ALTIVEC_TEST_sub_mean_reduction_WITH        sub_mean_reduction
101 /* the following functions modify their input but it shouldn't affect timing */
102 #undef  ALTIVEC_TEST_iquant_non_intra_m1_WITH       iquant_non_intra_m1
103 #undef  ALTIVEC_TEST_iquant_non_intra_m2_WITH       iquant_non_intra_m2
104 #undef  ALTIVEC_TEST_iquant_intra_m1_WITH           iquant_intra_m1
105 #undef  ALTIVEC_TEST_iquant_intra_m2_WITH           iquant_intra_m2
106 #undef  ALTIVEC_TEST_fdct_WITH                      fdct
107 #undef  ALTIVEC_TEST_idct_WITH                      idct
108 
109 /* turn off (undef) DST during benchmarking, it only slows the function down
110  * since everything will be cached due to the benchmark loop.
111  */
112 #undef ALTIVEC_DST
113 
114 /* default benchmark settings. these values can be redefined before
115  * calling ALTIVEC_TEST() to customize values on a per function basis.
116  */
117 #define BENCHMARK_FREQUENCY  2000   /* benchmark every (n) calls */
118 #define BENCHMARK_MAX_RUNS   20     /* benchmark only (n) times */
119 #define BENCHMARK_ITERATIONS 100000 /* starting point for calibration */
120 #define BENCHMARK_INCREMENT  1.5    /* multiply iterations by this amount */
121 #define BENCHMARK_PRECISION  0.005  /* calibration goal */
122 #define BENCHMARK_PASSES     2      /* number of times the calibration goal
123                                      * must be met before continuting
124                                      */
125 #define BENCHMARK_TIMELIMIT  4      /* time limit in seconds for benchmark
126                                      * if calibration goal can't be met.
127                                      */
128 #define BENCHMARK_PROLOG            /* code to execute before benchmark */
129 #define BENCHMARK_EPILOG            /* code to execute after benchmark */
130 
131 #  include "benchmark.h"
132 #  define ALTIVEC_TEST_SUFFIX(name) name##_altivec_benchmark
133 #  define ALTIVEC_FUNCTION(name,ret,def)                                     \
134     ret name##_altivec def;                                                  \
135     ret name##_altivec_benchmark def;
136 #  define ALTIVEC_TEST ALTIVEC_TEST_BENCHMARK
137 
138 /* }}} */
139 #elif defined(ALTIVEC_AMBER)
140 /* {{{ */
141 /* define each function to amber */
142 /* since some functions modify their input all are tested individually */
143 #if 1 /* altivec = 1, orignal C = 0 */
144 /* amber optimized functions */
145 #if 0 /* group1 = 1, group2 = 0 */
146 #define ALTIVEC_TEST_sad_00_WITH                 sad_00_altivec
147 #define ALTIVEC_TEST_sad_01_WITH                 sad_01_altivec
148 #define ALTIVEC_TEST_sad_10_WITH                 sad_10_altivec
149 #define ALTIVEC_TEST_sad_11_WITH                 sad_11_altivec
150 #undef  ALTIVEC_TEST_bsad_WITH                   bsad_altivec
151 #define ALTIVEC_TEST_sumsq_WITH                  sumsq_altivec
152 #define ALTIVEC_TEST_bsumsq_WITH                 bsumsq_altivec
153 #define ALTIVEC_TEST_sumsq_sub22_WITH            sumsq_sub22_altivec
154 #define ALTIVEC_TEST_bsumsq_sub22_WITH           bsumsq_sub22_altivec
155 #define ALTIVEC_TEST_quant_non_intra_WITH        quant_non_intra_altivec
156 #define ALTIVEC_TEST_quant_weight_coeff_intra_WITH quant_weight_coeff_intra_altivec
157 #define ALTIVEC_TEST_quant_weight_coeff_inter_WITH quant_weight_coeff_inter_altivec
158 #define ALTIVEC_TEST_iquant_non_intra_m1_WITH    iquant_non_intra_m1_altivec
159 #define ALTIVEC_TEST_iquant_non_intra_m2_WITH    iquant_non_intra_m2_altivec
160 #define ALTIVEC_TEST_iquant_intra_m1_WITH        iquant_intra_m1_altivec
161 #define ALTIVEC_TEST_iquant_intra_m2_WITH        iquant_intra_m2_altivec
162 #undef  ALTIVEC_TEST_sub_mean_reduction_WITH     sub_mean_reduction_altivec
163 #define ALTIVEC_TEST_variance_WITH               variance_altivec
164 #define ALTIVEC_TEST_sub_pred_WITH               sub_pred_altivec
165 #define ALTIVEC_TEST_add_pred_WITH               add_pred_altivec
166 #define ALTIVEC_TEST_pred_comp_WITH              pred_comp_altivec
167 #define ALTIVEC_TEST_subsample_image_WITH        subsample_image_altivec
168 #define ALTIVEC_TEST_field_dct_best_WITH         field_dct_best_altivec
169 #define ALTIVEC_TEST_fdct_WITH                   fdct_altivec
170 #define ALTIVEC_TEST_idct_WITH                   idct_altivec
171 #else /* the following call other amber functions, must amber separately */
172 #define ALTIVEC_TEST_build_sub44_mests_WITH      build_sub44_mests_altivec
173 #define ALTIVEC_TEST_build_sub22_mests_WITH      build_sub22_mests_altivec
174 #define ALTIVEC_TEST_find_best_one_pel_WITH      find_best_one_pel_altivec
175 #endif
176 #else
177 /* amber original functions */
178 #if 0 /* group1 = 1, group2 = 0 */
179 #define ALTIVEC_TEST_sad_00_WITH                 sad_00
180 #define ALTIVEC_TEST_sad_01_WITH                 sad_01
181 #define ALTIVEC_TEST_sad_10_WITH                 sad_10
182 #define ALTIVEC_TEST_sad_11_WITH                 sad_11
183 #undef  ALTIVEC_TEST_bsad_WITH                   bsad
184 #define ALTIVEC_TEST_sumsq_WITH                  sumsq
185 #define ALTIVEC_TEST_bsumsq_WITH                 bsumsq
186 #define ALTIVEC_TEST_sumsq_sub22_WITH            sumsq_sub22
187 #define ALTIVEC_TEST_bsumsq_sub22_WITH           bsumsq_sub22
188 #define ALTIVEC_TEST_quant_non_intra_WITH        quant_non_intra
189 #define ALTIVEC_TEST_quant_weight_coeff_intra_WITH quant_weight_coeff_intra
190 #define ALTIVEC_TEST_quant_weight_coeff_inter_WITH quant_weight_coeff_inter
191 #define ALTIVEC_TEST_iquant_non_intra_m1_WITH    iquant_non_intra_m1
192 #define ALTIVEC_TEST_iquant_non_intra_m2_WITH    iquant_non_intra_m2
193 #define ALTIVEC_TEST_iquant_intra_m1_WITH        iquant_intra_m1
194 #define ALTIVEC_TEST_iquant_intra_m2_WITH        iquant_intra_m2
195 #undef  ALTIVEC_TEST_sub_mean_reduction_WITH     sub_mean_reduction
196 #define ALTIVEC_TEST_variance_WITH               variance
197 #define ALTIVEC_TEST_sub_pred_WITH               sub_pred
198 #define ALTIVEC_TEST_add_pred_WITH               add_pred
199 #define ALTIVEC_TEST_pred_comp_WITH              pred_comp
200 #define ALTIVEC_TEST_subsample_image_WITH        subsample_image
201 #define ALTIVEC_TEST_field_dct_best_WITH         field_dct_best
202 #define ALTIVEC_TEST_fdct_WITH                   fdct
203 #define ALTIVEC_TEST_idct_WITH                   idct
204 #else /* the following call other amber functions, must amber separately */
205 #define ALTIVEC_TEST_build_sub44_mests_WITH      build_sub44_mests
206 #define ALTIVEC_TEST_build_sub22_mests_WITH      build_sub22_mests
207 #define ALTIVEC_TEST_find_best_one_pel_WITH      find_best_one_pel
208 #endif
209 #endif
210 
211 #define AMBER_MAX_TRACES 50  /* number of times to trace each function */
212 
213 #if 0 /* old global amber trace activation */
214 #define AMBER_ENABLE
215 #define AMBER_MAX_TRACES 1 /* trace each function once */
216 #define AMBER_MAX_EXIT 0
217 #endif
218 
219 #  define ALTIVEC_TEST_SUFFIX(name) name##_altivec_amber
220 #  define ALTIVEC_FUNCTION(name,ret,def)                                     \
221     ret name##_altivec def;                                                  \
222     ret name##_altivec_amber def;
223 #  define ALTIVEC_TEST ALTIVEC_TEST_AMBER
224 /* }}} */
225 #else /* PRODUCTION */
226 #  define ALTIVEC_FUNCTION(name,ret,def)                                     \
227     ret name##_altivec def;
228 #endif
229 
230 /* non-configurable macro definitions {{{ */
231 
232 #define ALTIVEC_SUFFIX(name) name##_altivec
233 
234 /* macros to assist in code generation */
235 #define AVCAT(a,b)   _AVCAT(a,b) /* allow expansion */
236 #define _AVCAT(a,b)  a##b        /* concatenate */
237 #define AVSTR(a)     _AVSTR(a)   /* allow expansion */
238 #define _AVSTR(a)    #a          /* convert to string */
239 /* AVRET* expand differently depending on return type */
240 #define AVRETDECL(type,name)  _AVRETDECL(_AVRETDECL_##type,name)
241 #define _AVRETDECL(type,name)  type(name)
242 #define _AVRETDECL_int(name)   int name
243 #define _AVRETDECL_void(name)  /* void name */
244 #define AVRETSET(type,var,call)  _AVRETSET(_AVRETSET_##type,var,call)
245 #define _AVRETSET(type,var,call)  type(var,call)
246 #define _AVRETSET_int(var,call)      var = call
247 #define _AVRETSET_void(var,call)  /* var = */ call
248 #define AVRETURN(type,var)  _AVRETURN(_AVRETURN_##type,var)
249 #define _AVRETURN(type,var)  type(var)
250 #define _AVRETURN_int(var)   return var
251 #define _AVRETURN_void(var)  /* return var */
252 /* printf format codes used by ALTIVEC_VERIFY */
253 #define AVFMT(ret)  _AVFMT(_AVFMT_##ret)
254 #define _AVFMT(ret) ret
255 #define _AVFMT_int    "%d"
256 #define _AVFMT_float  "%f"
257 
258 
259 #define ALTIVEC_TEST_FUNCTION(name) defined(ALTIVEC_TEST_##name##_WITH)
260 
261 #define ALTIVEC_TEST_WITH(name) \
262         _ALTIVEC_TEST_WITH(ALTIVEC_TEST_##name##_WITH)
263 #define _ALTIVEC_TEST_WITH(name) name /* allow expansion */
264 
265 /* }}} */
266 
267 /* vim:set sw=4 softtabstop=4 foldmethod=marker foldlevel=0: */
268