1// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package salsa
6
7// Core208 applies the Salsa20/8 core function to the 64-byte array in and puts
8// the result into the 64-byte array out. The input and output may be the same array.
9func Core208(out *[64]byte, in *[64]byte) {
10	j0 := uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24
11	j1 := uint32(in[4]) | uint32(in[5])<<8 | uint32(in[6])<<16 | uint32(in[7])<<24
12	j2 := uint32(in[8]) | uint32(in[9])<<8 | uint32(in[10])<<16 | uint32(in[11])<<24
13	j3 := uint32(in[12]) | uint32(in[13])<<8 | uint32(in[14])<<16 | uint32(in[15])<<24
14	j4 := uint32(in[16]) | uint32(in[17])<<8 | uint32(in[18])<<16 | uint32(in[19])<<24
15	j5 := uint32(in[20]) | uint32(in[21])<<8 | uint32(in[22])<<16 | uint32(in[23])<<24
16	j6 := uint32(in[24]) | uint32(in[25])<<8 | uint32(in[26])<<16 | uint32(in[27])<<24
17	j7 := uint32(in[28]) | uint32(in[29])<<8 | uint32(in[30])<<16 | uint32(in[31])<<24
18	j8 := uint32(in[32]) | uint32(in[33])<<8 | uint32(in[34])<<16 | uint32(in[35])<<24
19	j9 := uint32(in[36]) | uint32(in[37])<<8 | uint32(in[38])<<16 | uint32(in[39])<<24
20	j10 := uint32(in[40]) | uint32(in[41])<<8 | uint32(in[42])<<16 | uint32(in[43])<<24
21	j11 := uint32(in[44]) | uint32(in[45])<<8 | uint32(in[46])<<16 | uint32(in[47])<<24
22	j12 := uint32(in[48]) | uint32(in[49])<<8 | uint32(in[50])<<16 | uint32(in[51])<<24
23	j13 := uint32(in[52]) | uint32(in[53])<<8 | uint32(in[54])<<16 | uint32(in[55])<<24
24	j14 := uint32(in[56]) | uint32(in[57])<<8 | uint32(in[58])<<16 | uint32(in[59])<<24
25	j15 := uint32(in[60]) | uint32(in[61])<<8 | uint32(in[62])<<16 | uint32(in[63])<<24
26
27	x0, x1, x2, x3, x4, x5, x6, x7, x8 := j0, j1, j2, j3, j4, j5, j6, j7, j8
28	x9, x10, x11, x12, x13, x14, x15 := j9, j10, j11, j12, j13, j14, j15
29
30	for i := 0; i < 8; i += 2 {
31		u := x0 + x12
32		x4 ^= u<<7 | u>>(32-7)
33		u = x4 + x0
34		x8 ^= u<<9 | u>>(32-9)
35		u = x8 + x4
36		x12 ^= u<<13 | u>>(32-13)
37		u = x12 + x8
38		x0 ^= u<<18 | u>>(32-18)
39
40		u = x5 + x1
41		x9 ^= u<<7 | u>>(32-7)
42		u = x9 + x5
43		x13 ^= u<<9 | u>>(32-9)
44		u = x13 + x9
45		x1 ^= u<<13 | u>>(32-13)
46		u = x1 + x13
47		x5 ^= u<<18 | u>>(32-18)
48
49		u = x10 + x6
50		x14 ^= u<<7 | u>>(32-7)
51		u = x14 + x10
52		x2 ^= u<<9 | u>>(32-9)
53		u = x2 + x14
54		x6 ^= u<<13 | u>>(32-13)
55		u = x6 + x2
56		x10 ^= u<<18 | u>>(32-18)
57
58		u = x15 + x11
59		x3 ^= u<<7 | u>>(32-7)
60		u = x3 + x15
61		x7 ^= u<<9 | u>>(32-9)
62		u = x7 + x3
63		x11 ^= u<<13 | u>>(32-13)
64		u = x11 + x7
65		x15 ^= u<<18 | u>>(32-18)
66
67		u = x0 + x3
68		x1 ^= u<<7 | u>>(32-7)
69		u = x1 + x0
70		x2 ^= u<<9 | u>>(32-9)
71		u = x2 + x1
72		x3 ^= u<<13 | u>>(32-13)
73		u = x3 + x2
74		x0 ^= u<<18 | u>>(32-18)
75
76		u = x5 + x4
77		x6 ^= u<<7 | u>>(32-7)
78		u = x6 + x5
79		x7 ^= u<<9 | u>>(32-9)
80		u = x7 + x6
81		x4 ^= u<<13 | u>>(32-13)
82		u = x4 + x7
83		x5 ^= u<<18 | u>>(32-18)
84
85		u = x10 + x9
86		x11 ^= u<<7 | u>>(32-7)
87		u = x11 + x10
88		x8 ^= u<<9 | u>>(32-9)
89		u = x8 + x11
90		x9 ^= u<<13 | u>>(32-13)
91		u = x9 + x8
92		x10 ^= u<<18 | u>>(32-18)
93
94		u = x15 + x14
95		x12 ^= u<<7 | u>>(32-7)
96		u = x12 + x15
97		x13 ^= u<<9 | u>>(32-9)
98		u = x13 + x12
99		x14 ^= u<<13 | u>>(32-13)
100		u = x14 + x13
101		x15 ^= u<<18 | u>>(32-18)
102	}
103	x0 += j0
104	x1 += j1
105	x2 += j2
106	x3 += j3
107	x4 += j4
108	x5 += j5
109	x6 += j6
110	x7 += j7
111	x8 += j8
112	x9 += j9
113	x10 += j10
114	x11 += j11
115	x12 += j12
116	x13 += j13
117	x14 += j14
118	x15 += j15
119
120	out[0] = byte(x0)
121	out[1] = byte(x0 >> 8)
122	out[2] = byte(x0 >> 16)
123	out[3] = byte(x0 >> 24)
124
125	out[4] = byte(x1)
126	out[5] = byte(x1 >> 8)
127	out[6] = byte(x1 >> 16)
128	out[7] = byte(x1 >> 24)
129
130	out[8] = byte(x2)
131	out[9] = byte(x2 >> 8)
132	out[10] = byte(x2 >> 16)
133	out[11] = byte(x2 >> 24)
134
135	out[12] = byte(x3)
136	out[13] = byte(x3 >> 8)
137	out[14] = byte(x3 >> 16)
138	out[15] = byte(x3 >> 24)
139
140	out[16] = byte(x4)
141	out[17] = byte(x4 >> 8)
142	out[18] = byte(x4 >> 16)
143	out[19] = byte(x4 >> 24)
144
145	out[20] = byte(x5)
146	out[21] = byte(x5 >> 8)
147	out[22] = byte(x5 >> 16)
148	out[23] = byte(x5 >> 24)
149
150	out[24] = byte(x6)
151	out[25] = byte(x6 >> 8)
152	out[26] = byte(x6 >> 16)
153	out[27] = byte(x6 >> 24)
154
155	out[28] = byte(x7)
156	out[29] = byte(x7 >> 8)
157	out[30] = byte(x7 >> 16)
158	out[31] = byte(x7 >> 24)
159
160	out[32] = byte(x8)
161	out[33] = byte(x8 >> 8)
162	out[34] = byte(x8 >> 16)
163	out[35] = byte(x8 >> 24)
164
165	out[36] = byte(x9)
166	out[37] = byte(x9 >> 8)
167	out[38] = byte(x9 >> 16)
168	out[39] = byte(x9 >> 24)
169
170	out[40] = byte(x10)
171	out[41] = byte(x10 >> 8)
172	out[42] = byte(x10 >> 16)
173	out[43] = byte(x10 >> 24)
174
175	out[44] = byte(x11)
176	out[45] = byte(x11 >> 8)
177	out[46] = byte(x11 >> 16)
178	out[47] = byte(x11 >> 24)
179
180	out[48] = byte(x12)
181	out[49] = byte(x12 >> 8)
182	out[50] = byte(x12 >> 16)
183	out[51] = byte(x12 >> 24)
184
185	out[52] = byte(x13)
186	out[53] = byte(x13 >> 8)
187	out[54] = byte(x13 >> 16)
188	out[55] = byte(x13 >> 24)
189
190	out[56] = byte(x14)
191	out[57] = byte(x14 >> 8)
192	out[58] = byte(x14 >> 16)
193	out[59] = byte(x14 >> 24)
194
195	out[60] = byte(x15)
196	out[61] = byte(x15 >> 8)
197	out[62] = byte(x15 >> 16)
198	out[63] = byte(x15 >> 24)
199}
200