1 /* Compute the sum of the squares of a vector of signed shorts
2 
3  * The SSE2 and MMX assist routines both operate on multiples of
4  * 8 words; they differ only in their alignment requirements (8 bytes
5  * for MMX, 16 bytes for SSE2)
6 
7  * Copyright 2004 Phil Karn, KA9Q
8  * May be used under the terms of the GNU Lesser Public License (LGPL)
9  */
10 
11 long long sumsq_sse2_assist(signed short *,int);
12 
sumsq_sse2(signed short * in,int cnt)13 long long sumsq_sse2(signed short *in,int cnt){
14   long long sum = 0;
15 
16   /* Handle stuff before the next 8-byte boundary */
17   while(((int)in & 15) != 0 && cnt != 0){
18     sum += (long)in[0] * in[0];
19     in++;
20     cnt--;
21   }
22   sum += sumsq_sse2_assist(in,cnt);
23   in += cnt & ~7;
24   cnt &= 7;
25 
26   /* Handle up to 7 trailing words */
27   while(cnt != 0){
28     sum += (long)in[0] * in[0];
29     in++;
30     cnt--;
31   }
32   return sum;
33 }
34