1#!/usr/bin/perl 2# 3 4 5 6print <<EOF 7/* This file is autogenerated. Do not edit. */ 8/* 9 * LIBOIL - Library of Optimized Inner Loops 10 * Copyright (c) 2005 David A. Schleef <ds@schleef.org> 11 * All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 24 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 26 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 27 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 28 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 30 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 31 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 * POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35#ifdef HAVE_CONFIG_H 36#include "config.h" 37#endif 38 39#include <math.h> 40 41#include <liboil/liboil.h> 42#include <liboil/liboilclasses.h> 43 44EOF 45; 46 47 48sub binary_pointer 49{ 50 my $kernel = shift; 51 my $precision = shift; 52 my $type = "oil_type_$precision"; 53 my $operator = shift; 54 55 print <<EOF 56static void 57${kernel}_${precision}_pointer (${type} *dest, ${type} *src1, ${type} *src2, int n) 58{ 59 while (n) { 60 *dest = *src1 ${operator} *src2; 61 dest++; 62 src1++; 63 src2++; 64 n--; 65 } 66} 67OIL_DEFINE_IMPL (${kernel}_${precision}_pointer, ${kernel}_${precision}); 68 69EOF 70; 71} 72 73sub binary_unroll2 74{ 75 my $kernel = shift; 76 my $precision = shift; 77 my $type = "oil_type_$precision"; 78 my $operator = shift; 79 80 print <<EOF 81static void 82${kernel}_${precision}_unroll2 (${type} *dest, ${type} *src1, ${type} *src2, int n) 83{ 84 int i; 85 86 if (n & 1) { 87 dest[0] = src1[0] ${operator} src2[0]; 88 dest++; 89 src1++; 90 src2++; 91 n--; 92 } 93 for(i=0;i<n;i+=2){ 94 dest[i] = src1[i] ${operator} src2[i]; 95 dest[i+1] = src1[i+1] ${operator} src2[i+1]; 96 } 97} 98OIL_DEFINE_IMPL (${kernel}_${precision}_unroll2, ${kernel}_${precision}); 99 100EOF 101; 102} 103 104sub binary_unroll4a 105{ 106 my $kernel = shift; 107 my $precision = shift; 108 my $type = "oil_type_$precision"; 109 my $operator = shift; 110 111 print <<EOF 112static void 113${kernel}_${precision}_unroll4a (${type} *dest, ${type} *src1, ${type} *src2, int n) 114{ 115 int i; 116 117 while (n & 3) { 118 dest[0] = src1[0] ${operator} src2[0]; 119 dest++; 120 src1++; 121 src2++; 122 n--; 123 } 124 for(i=0;i<n;i+=4){ 125 dest[i] = src1[i] ${operator} src2[i]; 126 dest[i+1] = src1[i+1] ${operator} src2[i+1]; 127 dest[i+2] = src1[i+2] ${operator} src2[i+2]; 128 dest[i+3] = src1[i+3] ${operator} src2[i+3]; 129 } 130} 131OIL_DEFINE_IMPL (${kernel}_${precision}_unroll4a, ${kernel}_${precision}); 132 133EOF 134; 135} 136 137sub binary_unroll4b 138{ 139 my $kernel = shift; 140 my $precision = shift; 141 my $type = "oil_type_$precision"; 142 my $operator = shift; 143 144 print <<EOF 145static void 146${kernel}_${precision}_unroll4b (${type} *dest, ${type} *src1, ${type} *src2, int n) 147{ 148 int i; 149 150 for(i=0;i<(n&(~0x3));i+=4){ 151 dest[i+0] = src1[i+0] ${operator} src2[i+0]; 152 dest[i+1] = src1[i+1] ${operator} src2[i+1]; 153 dest[i+2] = src1[i+2] ${operator} src2[i+2]; 154 dest[i+3] = src1[i+3] ${operator} src2[i+3]; 155 } 156 for(;i<n;i++){ 157 dest[i] = src1[i] ${operator} src2[i]; 158 } 159} 160OIL_DEFINE_IMPL (${kernel}_${precision}_unroll4b, ${kernel}_${precision}); 161 162EOF 163; 164} 165 166sub binary_unroll4c 167{ 168 my $kernel = shift; 169 my $precision = shift; 170 my $type = "oil_type_$precision"; 171 my $operator = shift; 172 173 print <<EOF 174static void 175${kernel}_${precision}_unroll4c (${type} *dest, ${type} *src1, ${type} *src2, int n) 176{ 177 int i; 178 179 for(i=0;i<(n&(~0x3));i+=4){ 180 *dest++ = *src1++ ${operator} *src2++; 181 *dest++ = *src1++ ${operator} *src2++; 182 *dest++ = *src1++ ${operator} *src2++; 183 *dest++ = *src1++ ${operator} *src2++; 184 } 185 for(;i<n;i++){ 186 *dest++ = *src1++ ${operator} *src2++; 187 } 188} 189OIL_DEFINE_IMPL (${kernel}_${precision}_unroll4c, ${kernel}_${precision}); 190 191EOF 192; 193} 194 195my %binary_operators = ( 196 "add" => "+", 197 "subtract" => "-", 198 "multiply" => "*", 199 "divide" => "/" 200); 201 202my @types = ( "f32", "f64" ); 203 204while ( ($name, $op) = each %binary_operators ) { 205 foreach $prec (@types) { 206 binary_pointer($name, $prec, $op); 207 binary_unroll2($name, $prec, $op); 208 binary_unroll4a($name, $prec, $op); 209 binary_unroll4b($name, $prec, $op); 210 binary_unroll4c($name, $prec, $op); 211 } 212} 213 214exit 0; 215 216binary_pointer("subtract", "f32", "-"); 217binary_unroll2("subtract", "f32", "-"); 218binary_unroll4a("subtract", "f32", "-"); 219binary_unroll4b("subtract", "f32", "-"); 220binary_unroll4c("subtract", "f32", "-"); 221 222binary_pointer("add", "f32", "+"); 223binary_unroll2("add", "f32", "+"); 224binary_unroll4a("add", "f32", "+"); 225binary_unroll4b("add", "f32", "+"); 226binary_unroll4c("add", "f32", "+"); 227 228binary_pointer("multiply", "f32", "*"); 229binary_unroll2("multiply", "f32", "*"); 230binary_unroll4a("multiply", "f32", "*"); 231binary_unroll4b("multiply", "f32", "*"); 232binary_unroll4c("multiply", "f32", "*"); 233 234binary_pointer("divide", "f32", "/"); 235binary_unroll2("divide", "f32", "/"); 236binary_unroll4a("divide", "f32", "/"); 237binary_unroll4b("divide", "f32", "/"); 238binary_unroll4c("divide", "f32", "/"); 239 240binary_pointer("subtract", "f64", "-"); 241binary_unroll2("subtract", "f64", "-"); 242binary_unroll4a("subtract", "f64", "-"); 243binary_unroll4b("subtract", "f64", "-"); 244binary_unroll4c("subtract", "f64", "-"); 245 246binary_pointer("add", "f64", "+"); 247binary_unroll2("add", "f64", "+"); 248binary_unroll4a("add", "f64", "+"); 249binary_unroll4b("add", "f64", "+"); 250binary_unroll4c("add", "f64", "+"); 251 252binary_pointer("multiply", "f64", "*"); 253binary_unroll2("multiply", "f64", "*"); 254binary_unroll4a("multiply", "f64", "*"); 255binary_unroll4b("multiply", "f64", "*"); 256binary_unroll4c("multiply", "f64", "*"); 257 258binary_pointer("divide", "f64", "/"); 259binary_unroll2("divide", "f64", "/"); 260binary_unroll4a("divide", "f64", "/"); 261binary_unroll4b("divide", "f64", "/"); 262binary_unroll4c("divide", "f64", "/"); 263 264$blah = " 265static void 266subtract_f32_ref (float *dest, float *src1, float *src2, int n) 267{ 268 int i; 269 270 for(i=0;i<n;i++){ 271 dest[i] = src1[i] - src2[i]; 272 } 273} 274OIL_DEFINE_IMPL (subtract_f32_ref, subtract_f32); 275 276static void 277multiply_f32_ref (float *dest, float *src1, float *src2, int n) 278{ 279 int i; 280 281 for(i=0;i<n;i++){ 282 dest[i] = src1[i] * src2[i]; 283 } 284} 285OIL_DEFINE_IMPL (multiply_f32_ref, multiply_f32); 286 287static void 288divide_f32_ref (float *dest, float *src1, float *src2, int n) 289{ 290 int i; 291 292 for(i=0;i<n;i++){ 293 dest[i] = src1[i] / src2[i]; 294 } 295} 296OIL_DEFINE_IMPL_REF (divide_f32_ref, divide_f32); 297 298static void 299minimum_f32_ref (float *dest, float *src1, float *src2, int n) 300{ 301 int i; 302 303 for(i=0;i<n;i++){ 304 dest[i] = (src1[i] < src2[i]) ? src1[i] : src2[i]; 305 } 306} 307OIL_DEFINE_IMPL_REF (minimum_f32_ref, minimum_f32); 308 309static void 310maximum_f32_ref (float *dest, float *src1, float *src2, int n) 311{ 312 int i; 313 314 for(i=0;i<n;i++){ 315 dest[i] = (src1[i] > src2[i]) ? src1[i] : src2[i]; 316 } 317} 318OIL_DEFINE_IMPL_REF (maximum_f32_ref, maximum_f32); 319 320static void 321negative_f32_ref (float *dest, float *src1, int n) 322{ 323 int i; 324 325 for(i=0;i<n;i++){ 326 dest[i] = -src1[i]; 327 } 328} 329OIL_DEFINE_IMPL_REF (negative_f32_ref, negative_f32); 330 331static void 332inverse_f32_ref (float *dest, float *src1, int n) 333{ 334 int i; 335 336 for(i=0;i<n;i++){ 337 dest[i] = 1.0/src1[i]; 338 } 339} 340OIL_DEFINE_IMPL_REF (inverse_f32_ref, inverse_f32); 341 342static void 343sign_f32_ref (float *dest, float *src1, int n) 344{ 345 int i; 346 347 for(i=0;i<n;i++){ 348 dest[i] = (src1[i] < 0) ? -src1[i] : src1[i]; 349 } 350} 351OIL_DEFINE_IMPL_REF (sign_f32_ref, sign_f32); 352 353static void 354floor_f32_ref (float *dest, float *src1, int n) 355{ 356 int i; 357 358 for(i=0;i<n;i++){ 359 dest[i] = floor(src1[i]); 360 } 361} 362OIL_DEFINE_IMPL_REF (floor_f32_ref, floor_f32); 363 364 365 366static void 367scalaradd_f32_ns_ref (float *dest, float *src1, float *src2, int n) 368{ 369 int i; 370 371 for(i=0;i<n;i++){ 372 dest[i] = src1[i] + src2[0]; 373 } 374} 375OIL_DEFINE_IMPL_REF (scalaradd_f32_ns_ref, scalaradd_f32_ns); 376 377static void 378scalarmultiply_f32_ns_ref (float *dest, float *src1, float *src2, int n) 379{ 380 int i; 381 382 for(i=0;i<n;i++){ 383 dest[i] = src1[i] * src2[0]; 384 } 385} 386OIL_DEFINE_IMPL_REF (scalarmultiply_f32_ns_ref, scalarmultiply_f32_ns); 387 388 389"; 390