1 /*
2  * Copyright (c) 2005
3  *	Eric Anholt.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30 #include <liboil/liboilclasses.h>
31 #include <liboil/liboilfunction.h>
32 #include <mmintrin.h>
33 
34 #ifdef ENABLE_BROKEN_IMPLS
35 union m64_int {
36   __m64 m64;
37   uint64_t ull;
38 };
39 
40 static const struct _MMXData {
41   union m64_int mmx_4x0080;
42 } c = {
43     .mmx_4x0080.ull =	0x0080008000800080ULL,
44 };
45 
46 #define MC(x) (c.mmx_##x.m64)
47 
48 static void
recon8x8_intra_mmx(uint8_t * dest,int ds,int16_t * change)49 recon8x8_intra_mmx (uint8_t *dest, int ds, int16_t *change)
50 {
51   int i;
52   __m64 offset = MC(4x0080);
53 
54   for (i = 8; i; i--) {
55     __m64 mm0, mm1, c0, c1;
56     c0 = ((__m64 *)change)[0];
57     c1 = ((__m64 *)change)[1];
58     mm0 = _mm_adds_pi16(c0, offset);
59     mm1 = _mm_adds_pi16(c1, offset);
60     *(__m64 *)dest = _mm_packs_pu16(mm0, mm1);
61 
62     dest += ds;
63     change += 8;
64   }
65   _mm_empty();
66 }
67 OIL_DEFINE_IMPL_FULL (recon8x8_intra_mmx, recon8x8_intra, OIL_IMPL_FLAG_MMX);
68 #endif
69 
70 static void
recon8x8_inter_mmx(uint8_t * dest,int ds,uint8_t * src,int ss,int16_t * change,int dss)71 recon8x8_inter_mmx (uint8_t *dest, int ds, uint8_t *src, int ss,
72     int16_t *change, int dss)
73 {
74   int i;
75 
76   for (i = 8; i; i--) {
77     __m64 mm0, mm1, c0, c1;
78     c0 = ((__m64 *)change)[0];
79     c1 = ((__m64 *)change)[1];
80     mm0 = _mm_unpacklo_pi8(*(__m64 *)src, _mm_setzero_si64());
81     mm1 = _mm_unpackhi_pi8(*(__m64 *)src, _mm_setzero_si64());
82     mm0 = _mm_adds_pi16(mm0, c0);
83     mm1 = _mm_adds_pi16(mm1, c1);
84     *(__m64 *)dest = _mm_packs_pu16(mm0, mm1);
85 
86     change += 8;
87     dest += ds;
88     src += ss;
89   }
90   _mm_empty();
91 }
92 OIL_DEFINE_IMPL_FULL (recon8x8_inter_mmx, recon8x8_inter, OIL_IMPL_FLAG_MMX);
93 
94 static void
recon8x8_inter2_mmx(uint8_t * dest,int ds,uint8_t * s1,int ss1,uint8_t * s2,int ss2,int16_t * change)95 recon8x8_inter2_mmx (uint8_t *dest, int ds, uint8_t *s1, int ss1, uint8_t *s2,
96     int ss2, int16_t *change)
97 {
98   int i;
99 
100   for (i = 8; i; i--) {
101     __m64 mm0, mm1, c0, c1;
102     mm0 = _mm_adds_pu16(
103 	_mm_unpacklo_pi8(*(__m64 *)s1, _mm_setzero_si64()),
104 	_mm_unpacklo_pi8(*(__m64 *)s2, _mm_setzero_si64()));
105     mm1 = _mm_adds_pu16(
106 	_mm_unpackhi_pi8(*(__m64 *)s1, _mm_setzero_si64()),
107 	_mm_unpackhi_pi8(*(__m64 *)s2, _mm_setzero_si64()));
108     c0 = ((__m64 *)change)[0];
109     c1 = ((__m64 *)change)[1];
110     mm0 = _mm_srli_pi16(mm0, 1);
111     mm1 = _mm_srli_pi16(mm1, 1);
112     mm0 = _mm_adds_pi16(mm0, c0);
113     mm1 = _mm_adds_pi16(mm1, c1);
114     *(__m64 *)dest = _mm_packs_pu16(mm0, mm1);
115 
116     change += 8;
117     dest += ds;
118     s1 += ss1;
119     s2 += ss2;
120   }
121   _mm_empty();
122 }
123 OIL_DEFINE_IMPL_FULL (recon8x8_inter2_mmx, recon8x8_inter2, OIL_IMPL_FLAG_MMX);
124