1#!/usr/bin/perl
2#
3
4
5
6print <<EOF
7/* This file is autogenerated.  Do not edit. */
8/*
9 * LIBOIL - Library of Optimized Inner Loops
10 * Copyright (c) 2005 David A. Schleef <ds@schleef.org>
11 * All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
24 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
26 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
27 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
28 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
30 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
31 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
33 */
34
35#ifdef HAVE_CONFIG_H
36#include "config.h"
37#endif
38
39#include <math.h>
40
41#include <liboil/liboil.h>
42#include <liboil/liboilclasses.h>
43
44EOF
45;
46
47
48sub binary_pointer
49{
50	my $kernel = shift;
51	my $precision = shift;
52	my $type = "oil_type_$precision";
53	my $operator = shift;
54
55	print <<EOF
56static void
57${kernel}_${precision}_pointer (${type} *dest, ${type} *src1, ${type} *src2, int n)
58{
59  while (n) {
60    *dest = *src1 ${operator} *src2;
61    dest++;
62    src1++;
63    src2++;
64    n--;
65  }
66}
67OIL_DEFINE_IMPL (${kernel}_${precision}_pointer, ${kernel}_${precision});
68
69EOF
70;
71}
72
73sub binary_unroll2
74{
75	my $kernel = shift;
76	my $precision = shift;
77	my $type = "oil_type_$precision";
78	my $operator = shift;
79
80	print <<EOF
81static void
82${kernel}_${precision}_unroll2 (${type} *dest, ${type} *src1, ${type} *src2, int n)
83{
84  int i;
85
86  if (n & 1) {
87    dest[0] = src1[0] ${operator} src2[0];
88    dest++;
89    src1++;
90    src2++;
91    n--;
92  }
93  for(i=0;i<n;i+=2){
94    dest[i] = src1[i] ${operator} src2[i];
95    dest[i+1] = src1[i+1] ${operator} src2[i+1];
96  }
97}
98OIL_DEFINE_IMPL (${kernel}_${precision}_unroll2, ${kernel}_${precision});
99
100EOF
101;
102}
103
104sub binary_unroll4a
105{
106	my $kernel = shift;
107	my $precision = shift;
108	my $type = "oil_type_$precision";
109	my $operator = shift;
110
111	print <<EOF
112static void
113${kernel}_${precision}_unroll4a (${type} *dest, ${type} *src1, ${type} *src2, int n)
114{
115  int i;
116
117  while (n & 3) {
118    dest[0] = src1[0] ${operator} src2[0];
119    dest++;
120    src1++;
121    src2++;
122    n--;
123  }
124  for(i=0;i<n;i+=4){
125    dest[i] = src1[i] ${operator} src2[i];
126    dest[i+1] = src1[i+1] ${operator} src2[i+1];
127    dest[i+2] = src1[i+2] ${operator} src2[i+2];
128    dest[i+3] = src1[i+3] ${operator} src2[i+3];
129  }
130}
131OIL_DEFINE_IMPL (${kernel}_${precision}_unroll4a, ${kernel}_${precision});
132
133EOF
134;
135}
136
137sub binary_unroll4b
138{
139	my $kernel = shift;
140	my $precision = shift;
141	my $type = "oil_type_$precision";
142	my $operator = shift;
143
144	print <<EOF
145static void
146${kernel}_${precision}_unroll4b (${type} *dest, ${type} *src1, ${type} *src2, int n)
147{
148  int i;
149
150  for(i=0;i<(n&(~0x3));i+=4){
151    dest[i+0] = src1[i+0] ${operator} src2[i+0];
152    dest[i+1] = src1[i+1] ${operator} src2[i+1];
153    dest[i+2] = src1[i+2] ${operator} src2[i+2];
154    dest[i+3] = src1[i+3] ${operator} src2[i+3];
155  }
156  for(;i<n;i++){
157    dest[i] = src1[i] ${operator} src2[i];
158  }
159}
160OIL_DEFINE_IMPL (${kernel}_${precision}_unroll4b, ${kernel}_${precision});
161
162EOF
163;
164}
165
166sub binary_unroll4c
167{
168	my $kernel = shift;
169	my $precision = shift;
170	my $type = "oil_type_$precision";
171	my $operator = shift;
172
173	print <<EOF
174static void
175${kernel}_${precision}_unroll4c (${type} *dest, ${type} *src1, ${type} *src2, int n)
176{
177  int i;
178
179  for(i=0;i<(n&(~0x3));i+=4){
180    *dest++ = *src1++ ${operator} *src2++;
181    *dest++ = *src1++ ${operator} *src2++;
182    *dest++ = *src1++ ${operator} *src2++;
183    *dest++ = *src1++ ${operator} *src2++;
184  }
185  for(;i<n;i++){
186    *dest++ = *src1++ ${operator} *src2++;
187  }
188}
189OIL_DEFINE_IMPL (${kernel}_${precision}_unroll4c, ${kernel}_${precision});
190
191EOF
192;
193}
194
195my %binary_operators = (
196 "add" => "+",
197 "subtract" => "-",
198 "multiply" => "*",
199 "divide" => "/"
200);
201
202my @types = ( "f32", "f64" );
203
204while ( ($name, $op) = each %binary_operators ) {
205  foreach $prec (@types) {
206    binary_pointer($name, $prec, $op);
207    binary_unroll2($name, $prec, $op);
208    binary_unroll4a($name, $prec, $op);
209    binary_unroll4b($name, $prec, $op);
210    binary_unroll4c($name, $prec, $op);
211  }
212}
213
214exit 0;
215
216binary_pointer("subtract", "f32", "-");
217binary_unroll2("subtract", "f32", "-");
218binary_unroll4a("subtract", "f32", "-");
219binary_unroll4b("subtract", "f32", "-");
220binary_unroll4c("subtract", "f32", "-");
221
222binary_pointer("add", "f32", "+");
223binary_unroll2("add", "f32", "+");
224binary_unroll4a("add", "f32", "+");
225binary_unroll4b("add", "f32", "+");
226binary_unroll4c("add", "f32", "+");
227
228binary_pointer("multiply", "f32", "*");
229binary_unroll2("multiply", "f32", "*");
230binary_unroll4a("multiply", "f32", "*");
231binary_unroll4b("multiply", "f32", "*");
232binary_unroll4c("multiply", "f32", "*");
233
234binary_pointer("divide", "f32", "/");
235binary_unroll2("divide", "f32", "/");
236binary_unroll4a("divide", "f32", "/");
237binary_unroll4b("divide", "f32", "/");
238binary_unroll4c("divide", "f32", "/");
239
240binary_pointer("subtract", "f64", "-");
241binary_unroll2("subtract", "f64", "-");
242binary_unroll4a("subtract", "f64", "-");
243binary_unroll4b("subtract", "f64", "-");
244binary_unroll4c("subtract", "f64", "-");
245
246binary_pointer("add", "f64", "+");
247binary_unroll2("add", "f64", "+");
248binary_unroll4a("add", "f64", "+");
249binary_unroll4b("add", "f64", "+");
250binary_unroll4c("add", "f64", "+");
251
252binary_pointer("multiply", "f64", "*");
253binary_unroll2("multiply", "f64", "*");
254binary_unroll4a("multiply", "f64", "*");
255binary_unroll4b("multiply", "f64", "*");
256binary_unroll4c("multiply", "f64", "*");
257
258binary_pointer("divide", "f64", "/");
259binary_unroll2("divide", "f64", "/");
260binary_unroll4a("divide", "f64", "/");
261binary_unroll4b("divide", "f64", "/");
262binary_unroll4c("divide", "f64", "/");
263
264$blah = "
265static void
266subtract_f32_ref (float *dest, float *src1, float *src2, int n)
267{
268  int i;
269
270  for(i=0;i<n;i++){
271    dest[i] = src1[i] - src2[i];
272  }
273}
274OIL_DEFINE_IMPL (subtract_f32_ref, subtract_f32);
275
276static void
277multiply_f32_ref (float *dest, float *src1, float *src2, int n)
278{
279  int i;
280
281  for(i=0;i<n;i++){
282    dest[i] = src1[i] * src2[i];
283  }
284}
285OIL_DEFINE_IMPL (multiply_f32_ref, multiply_f32);
286
287static void
288divide_f32_ref (float *dest, float *src1, float *src2, int n)
289{
290  int i;
291
292  for(i=0;i<n;i++){
293    dest[i] = src1[i] / src2[i];
294  }
295}
296OIL_DEFINE_IMPL_REF (divide_f32_ref, divide_f32);
297
298static void
299minimum_f32_ref (float *dest, float *src1, float *src2, int n)
300{
301  int i;
302
303  for(i=0;i<n;i++){
304    dest[i] = (src1[i] < src2[i]) ? src1[i] : src2[i];
305  }
306}
307OIL_DEFINE_IMPL_REF (minimum_f32_ref, minimum_f32);
308
309static void
310maximum_f32_ref (float *dest, float *src1, float *src2, int n)
311{
312  int i;
313
314  for(i=0;i<n;i++){
315    dest[i] = (src1[i] > src2[i]) ? src1[i] : src2[i];
316  }
317}
318OIL_DEFINE_IMPL_REF (maximum_f32_ref, maximum_f32);
319
320static void
321negative_f32_ref (float *dest, float *src1, int n)
322{
323  int i;
324
325  for(i=0;i<n;i++){
326    dest[i] = -src1[i];
327  }
328}
329OIL_DEFINE_IMPL_REF (negative_f32_ref, negative_f32);
330
331static void
332inverse_f32_ref (float *dest, float *src1, int n)
333{
334  int i;
335
336  for(i=0;i<n;i++){
337    dest[i] = 1.0/src1[i];
338  }
339}
340OIL_DEFINE_IMPL_REF (inverse_f32_ref, inverse_f32);
341
342static void
343sign_f32_ref (float *dest, float *src1, int n)
344{
345  int i;
346
347  for(i=0;i<n;i++){
348    dest[i] = (src1[i] < 0) ? -src1[i] : src1[i];
349  }
350}
351OIL_DEFINE_IMPL_REF (sign_f32_ref, sign_f32);
352
353static void
354floor_f32_ref (float *dest, float *src1, int n)
355{
356  int i;
357
358  for(i=0;i<n;i++){
359    dest[i] = floor(src1[i]);
360  }
361}
362OIL_DEFINE_IMPL_REF (floor_f32_ref, floor_f32);
363
364
365
366static void
367scalaradd_f32_ns_ref (float *dest, float *src1, float *src2, int n)
368{
369  int i;
370
371  for(i=0;i<n;i++){
372    dest[i] = src1[i] + src2[0];
373  }
374}
375OIL_DEFINE_IMPL_REF (scalaradd_f32_ns_ref, scalaradd_f32_ns);
376
377static void
378scalarmultiply_f32_ns_ref (float *dest, float *src1, float *src2, int n)
379{
380  int i;
381
382  for(i=0;i<n;i++){
383    dest[i] = src1[i] * src2[0];
384  }
385}
386OIL_DEFINE_IMPL_REF (scalarmultiply_f32_ns_ref, scalarmultiply_f32_ns);
387
388
389";
390