1*b30d1939SAndy Fiddaman /***********************************************************************
2*b30d1939SAndy Fiddaman *                                                                      *
3*b30d1939SAndy Fiddaman *               This software is part of the ast package               *
4*b30d1939SAndy Fiddaman *          Copyright (c) 1996-2011 AT&T Intellectual Property          *
5*b30d1939SAndy Fiddaman *                      and is licensed under the                       *
6*b30d1939SAndy Fiddaman *                 Eclipse Public License, Version 1.0                  *
7*b30d1939SAndy Fiddaman *                    by AT&T Intellectual Property                     *
8*b30d1939SAndy Fiddaman *                                                                      *
9*b30d1939SAndy Fiddaman *                A copy of the License is available at                 *
10*b30d1939SAndy Fiddaman *          http://www.eclipse.org/org/documents/epl-v10.html           *
11*b30d1939SAndy Fiddaman *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12*b30d1939SAndy Fiddaman *                                                                      *
13*b30d1939SAndy Fiddaman *              Information and Software Systems Research               *
14*b30d1939SAndy Fiddaman *                            AT&T Research                             *
15*b30d1939SAndy Fiddaman *                           Florham Park NJ                            *
16*b30d1939SAndy Fiddaman *                                                                      *
17*b30d1939SAndy Fiddaman *                 Glenn Fowler <gsf@research.att.com>                  *
18*b30d1939SAndy Fiddaman *                                                                      *
19*b30d1939SAndy Fiddaman ***********************************************************************/
20*b30d1939SAndy Fiddaman #pragma prototyped
21*b30d1939SAndy Fiddaman 
22*b30d1939SAndy Fiddaman /*
23*b30d1939SAndy Fiddaman  * att
24*b30d1939SAndy Fiddaman  */
25*b30d1939SAndy Fiddaman 
26*b30d1939SAndy Fiddaman #define att_description	\
27*b30d1939SAndy Fiddaman 	"The system 5 release 4 checksum. This is the default for \bsum\b \
28*b30d1939SAndy Fiddaman 	when \bgetconf UNIVERSE\b is \batt\b. This is the only true sum; \
29*b30d1939SAndy Fiddaman 	all of the other methods are order dependent."
30*b30d1939SAndy Fiddaman #define att_options	0
31*b30d1939SAndy Fiddaman #define att_match	"att|sys5|s5|default"
32*b30d1939SAndy Fiddaman #define att_open	long_open
33*b30d1939SAndy Fiddaman #define att_init	long_init
34*b30d1939SAndy Fiddaman #define att_print	long_print
35*b30d1939SAndy Fiddaman #define att_data	long_data
36*b30d1939SAndy Fiddaman #define att_scale	512
37*b30d1939SAndy Fiddaman 
38*b30d1939SAndy Fiddaman #if defined(__SUNPRO_C) || defined(__GNUC__)
39*b30d1939SAndy Fiddaman 
40*b30d1939SAndy Fiddaman #if defined(__SUNPRO_C)
41*b30d1939SAndy Fiddaman #    include <sun_prefetch.h>
42*b30d1939SAndy Fiddaman #    define sum_prefetch(addr) sun_prefetch_read_many((void *)(addr))
43*b30d1939SAndy Fiddaman #elif defined(__GNUC__)
44*b30d1939SAndy Fiddaman #    define sum_prefetch(addr) __builtin_prefetch((addr), 0, 3)
45*b30d1939SAndy Fiddaman #else
46*b30d1939SAndy Fiddaman #    error Unknown compiler
47*b30d1939SAndy Fiddaman #endif
48*b30d1939SAndy Fiddaman 
49*b30d1939SAndy Fiddaman #define CBLOCK_SIZE (64)
50*b30d1939SAndy Fiddaman #pragma unroll(16)
51*b30d1939SAndy Fiddaman 
52*b30d1939SAndy Fiddaman /* Inmos transputer would love this algorithm */
53*b30d1939SAndy Fiddaman static int
att_block(register Sum_t * p,const void * s,size_t n)54*b30d1939SAndy Fiddaman att_block(register Sum_t* p, const void* s, size_t n)
55*b30d1939SAndy Fiddaman {
56*b30d1939SAndy Fiddaman 	register uint32_t	c = ((Integral_t*)p)->sum;
57*b30d1939SAndy Fiddaman 	register const unsigned char*	b = (const unsigned char*)s;
58*b30d1939SAndy Fiddaman 	register const unsigned char*	e = b + n;
59*b30d1939SAndy Fiddaman 	register uint32_t s0, s1, s2, s3, s4, s5, s6, s7;
60*b30d1939SAndy Fiddaman 	register unsigned int i;
61*b30d1939SAndy Fiddaman 
62*b30d1939SAndy Fiddaman 	s0=s1=s2=s3=s4=s5=s6=s7=0U;
63*b30d1939SAndy Fiddaman 
64*b30d1939SAndy Fiddaman 	sum_prefetch((void *)b);
65*b30d1939SAndy Fiddaman 
66*b30d1939SAndy Fiddaman 	while (n > CBLOCK_SIZE)
67*b30d1939SAndy Fiddaman 	{
68*b30d1939SAndy Fiddaman 		sum_prefetch((b+CBLOCK_SIZE));
69*b30d1939SAndy Fiddaman 
70*b30d1939SAndy Fiddaman 		/* Compiler will unroll for() loops per #pragma unroll */
71*b30d1939SAndy Fiddaman 		for (i=0 ; i < (CBLOCK_SIZE/8) ; i++)
72*b30d1939SAndy Fiddaman 		{
73*b30d1939SAndy Fiddaman 			/*
74*b30d1939SAndy Fiddaman 			 * use s0-s7 to decouple calculations (this improves pipelining)
75*b30d1939SAndy Fiddaman 			 * because each operation is completely independent from it's
76*b30d1939SAndy Fiddaman 			 * siblings
77*b30d1939SAndy Fiddaman 			 */
78*b30d1939SAndy Fiddaman 			s0+=b[0];
79*b30d1939SAndy Fiddaman 			s1+=b[1];
80*b30d1939SAndy Fiddaman 			s2+=b[2];
81*b30d1939SAndy Fiddaman 			s3+=b[3];
82*b30d1939SAndy Fiddaman 			s4+=b[4];
83*b30d1939SAndy Fiddaman 			s5+=b[5];
84*b30d1939SAndy Fiddaman 			s6+=b[6];
85*b30d1939SAndy Fiddaman 			s7+=b[7];
86*b30d1939SAndy Fiddaman 
87*b30d1939SAndy Fiddaman 			b+=8;
88*b30d1939SAndy Fiddaman 			n-=8;
89*b30d1939SAndy Fiddaman 		}
90*b30d1939SAndy Fiddaman 	}
91*b30d1939SAndy Fiddaman 
92*b30d1939SAndy Fiddaman 	c+=s0+s1+s2+s3+s4+s5+s6+s7;
93*b30d1939SAndy Fiddaman 
94*b30d1939SAndy Fiddaman 	while (b < e)
95*b30d1939SAndy Fiddaman 		c += *b++;
96*b30d1939SAndy Fiddaman 	((Integral_t*)p)->sum = c;
97*b30d1939SAndy Fiddaman 	return 0;
98*b30d1939SAndy Fiddaman }
99*b30d1939SAndy Fiddaman 
100*b30d1939SAndy Fiddaman #else
101*b30d1939SAndy Fiddaman static int
att_block(register Sum_t * p,const void * s,size_t n)102*b30d1939SAndy Fiddaman att_block(register Sum_t* p, const void* s, size_t n)
103*b30d1939SAndy Fiddaman {
104*b30d1939SAndy Fiddaman 	register uint32_t	c = ((Integral_t*)p)->sum;
105*b30d1939SAndy Fiddaman 	register unsigned char*	b = (unsigned char*)s;
106*b30d1939SAndy Fiddaman 	register unsigned char*	e = b + n;
107*b30d1939SAndy Fiddaman 
108*b30d1939SAndy Fiddaman 	while (b < e)
109*b30d1939SAndy Fiddaman 		c += *b++;
110*b30d1939SAndy Fiddaman 	((Integral_t*)p)->sum = c;
111*b30d1939SAndy Fiddaman 	return 0;
112*b30d1939SAndy Fiddaman }
113*b30d1939SAndy Fiddaman #endif /* defined(__SUNPRO_C) || defined(__GNUC__) */
114*b30d1939SAndy Fiddaman 
115*b30d1939SAndy Fiddaman static int
att_done(Sum_t * p)116*b30d1939SAndy Fiddaman att_done(Sum_t* p)
117*b30d1939SAndy Fiddaman {
118*b30d1939SAndy Fiddaman 	register uint32_t	c = ((Integral_t*)p)->sum;
119*b30d1939SAndy Fiddaman 
120*b30d1939SAndy Fiddaman 	c = (c & 0xffff) + ((c >> 16) & 0xffff);
121*b30d1939SAndy Fiddaman 	c = (c & 0xffff) + (c >> 16);
122*b30d1939SAndy Fiddaman 	((Integral_t*)p)->sum = c & 0xffff;
123*b30d1939SAndy Fiddaman 	return short_done(p);
124*b30d1939SAndy Fiddaman }
125