1//+build !noasm,!appengine
2
3// ARM64 version of SHA256
4
5//
6// Minio Cloud Storage, (C) 2016 Minio, Inc.
7//
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License at
11//
12//     http://www.apache.org/licenses/LICENSE-2.0
13//
14// Unless required by applicable law or agreed to in writing, software
15// distributed under the License is distributed on an "AS IS" BASIS,
16// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17// See the License for the specific language governing permissions and
18// limitations under the License.
19//
20
21//
22// Based on implementation as found in https://github.com/jocover/sha256-armv8
23//
24// Use github.com/minio/asm2plan9s on this file to assemble ARM instructions to
25// their Plan9 equivalents
26//
27
28TEXT ·blockArm(SB), 7, $0
29	MOVD h+0(FP), R0
30	MOVD message+24(FP), R1
31	MOVD message_len+32(FP), R2 // length of message
32	SUBS $64, R2
33	BMI  complete
34
35	// Load constants table pointer
36	MOVDconstants(SB), R3
37
38	// Cache constants table in registers v16 - v31
39	WORD $0x4cdf2870 // ld1	{v16.4s-v19.4s}, [x3], #64
40	WORD $0x4cdf7800 // ld1	{v0.4s}, [x0], #16
41	WORD $0x4cdf2874 // ld1	{v20.4s-v23.4s}, [x3], #64
42
43	WORD $0x4c407801 // ld1	{v1.4s}, [x0]
44	WORD $0x4cdf2878 // ld1	{v24.4s-v27.4s}, [x3], #64
45	WORD $0xd1004000 // sub	x0, x0, #0x10
46	WORD $0x4cdf287c // ld1	{v28.4s-v31.4s}, [x3], #64
47
48loop:
49	// Main loop
50	WORD $0x4cdf2025 // ld1	{v5.16b-v8.16b}, [x1], #64
51	WORD $0x4ea01c02 // mov	v2.16b, v0.16b
52	WORD $0x4ea11c23 // mov	v3.16b, v1.16b
53	WORD $0x6e2008a5 // rev32	v5.16b, v5.16b
54	WORD $0x6e2008c6 // rev32	v6.16b, v6.16b
55	WORD $0x4eb084a9 // add	v9.4s, v5.4s, v16.4s
56	WORD $0x6e2008e7 // rev32	v7.16b, v7.16b
57	WORD $0x4eb184ca // add	v10.4s, v6.4s, v17.4s
58	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
59	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
60	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
61	WORD $0x5e2828c5 // sha256su0	v5.4s, v6.4s
62	WORD $0x6e200908 // rev32	v8.16b, v8.16b
63	WORD $0x4eb284e9 // add	v9.4s, v7.4s, v18.4s
64	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
65	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
66	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
67	WORD $0x5e2828e6 // sha256su0	v6.4s, v7.4s
68	WORD $0x5e0860e5 // sha256su1	v5.4s, v7.4s, v8.4s
69	WORD $0x4eb3850a // add	v10.4s, v8.4s, v19.4s
70	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
71	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
72	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
73	WORD $0x5e282907 // sha256su0	v7.4s, v8.4s
74	WORD $0x5e056106 // sha256su1	v6.4s, v8.4s, v5.4s
75	WORD $0x4eb484a9 // add	v9.4s, v5.4s, v20.4s
76	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
77	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
78	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
79	WORD $0x5e2828a8 // sha256su0	v8.4s, v5.4s
80	WORD $0x5e0660a7 // sha256su1	v7.4s, v5.4s, v6.4s
81	WORD $0x4eb584ca // add	v10.4s, v6.4s, v21.4s
82	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
83	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
84	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
85	WORD $0x5e2828c5 // sha256su0	v5.4s, v6.4s
86	WORD $0x5e0760c8 // sha256su1	v8.4s, v6.4s, v7.4s
87	WORD $0x4eb684e9 // add	v9.4s, v7.4s, v22.4s
88	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
89	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
90	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
91	WORD $0x5e2828e6 // sha256su0	v6.4s, v7.4s
92	WORD $0x5e0860e5 // sha256su1	v5.4s, v7.4s, v8.4s
93	WORD $0x4eb7850a // add	v10.4s, v8.4s, v23.4s
94	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
95	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
96	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
97	WORD $0x5e282907 // sha256su0	v7.4s, v8.4s
98	WORD $0x5e056106 // sha256su1	v6.4s, v8.4s, v5.4s
99	WORD $0x4eb884a9 // add	v9.4s, v5.4s, v24.4s
100	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
101	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
102	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
103	WORD $0x5e2828a8 // sha256su0	v8.4s, v5.4s
104	WORD $0x5e0660a7 // sha256su1	v7.4s, v5.4s, v6.4s
105	WORD $0x4eb984ca // add	v10.4s, v6.4s, v25.4s
106	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
107	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
108	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
109	WORD $0x5e2828c5 // sha256su0	v5.4s, v6.4s
110	WORD $0x5e0760c8 // sha256su1	v8.4s, v6.4s, v7.4s
111	WORD $0x4eba84e9 // add	v9.4s, v7.4s, v26.4s
112	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
113	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
114	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
115	WORD $0x5e2828e6 // sha256su0	v6.4s, v7.4s
116	WORD $0x5e0860e5 // sha256su1	v5.4s, v7.4s, v8.4s
117	WORD $0x4ebb850a // add	v10.4s, v8.4s, v27.4s
118	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
119	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
120	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
121	WORD $0x5e282907 // sha256su0	v7.4s, v8.4s
122	WORD $0x5e056106 // sha256su1	v6.4s, v8.4s, v5.4s
123	WORD $0x4ebc84a9 // add	v9.4s, v5.4s, v28.4s
124	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
125	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
126	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
127	WORD $0x5e2828a8 // sha256su0	v8.4s, v5.4s
128	WORD $0x5e0660a7 // sha256su1	v7.4s, v5.4s, v6.4s
129	WORD $0x4ebd84ca // add	v10.4s, v6.4s, v29.4s
130	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
131	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
132	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
133	WORD $0x5e0760c8 // sha256su1	v8.4s, v6.4s, v7.4s
134	WORD $0x4ebe84e9 // add	v9.4s, v7.4s, v30.4s
135	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
136	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
137	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
138	WORD $0x4ebf850a // add	v10.4s, v8.4s, v31.4s
139	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
140	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
141	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
142	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
143	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
144	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
145	WORD $0x4ea38421 // add	v1.4s, v1.4s, v3.4s
146	WORD $0x4ea28400 // add	v0.4s, v0.4s, v2.4s
147
148	SUBS $64, R2
149	BPL  loop
150
151	// Store result
152	WORD $0x4c00a800 // st1	{v0.4s, v1.4s}, [x0]
153
154complete:
155	RET
156
157// Constants table
158DATA ·constants+0x0(SB)/8, $0x71374491428a2f98
159DATA ·constants+0x8(SB)/8, $0xe9b5dba5b5c0fbcf
160DATA ·constants+0x10(SB)/8, $0x59f111f13956c25b
161DATA ·constants+0x18(SB)/8, $0xab1c5ed5923f82a4
162DATA ·constants+0x20(SB)/8, $0x12835b01d807aa98
163DATA ·constants+0x28(SB)/8, $0x550c7dc3243185be
164DATA ·constants+0x30(SB)/8, $0x80deb1fe72be5d74
165DATA ·constants+0x38(SB)/8, $0xc19bf1749bdc06a7
166DATA ·constants+0x40(SB)/8, $0xefbe4786e49b69c1
167DATA ·constants+0x48(SB)/8, $0x240ca1cc0fc19dc6
168DATA ·constants+0x50(SB)/8, $0x4a7484aa2de92c6f
169DATA ·constants+0x58(SB)/8, $0x76f988da5cb0a9dc
170DATA ·constants+0x60(SB)/8, $0xa831c66d983e5152
171DATA ·constants+0x68(SB)/8, $0xbf597fc7b00327c8
172DATA ·constants+0x70(SB)/8, $0xd5a79147c6e00bf3
173DATA ·constants+0x78(SB)/8, $0x1429296706ca6351
174DATA ·constants+0x80(SB)/8, $0x2e1b213827b70a85
175DATA ·constants+0x88(SB)/8, $0x53380d134d2c6dfc
176DATA ·constants+0x90(SB)/8, $0x766a0abb650a7354
177DATA ·constants+0x98(SB)/8, $0x92722c8581c2c92e
178DATA ·constants+0xa0(SB)/8, $0xa81a664ba2bfe8a1
179DATA ·constants+0xa8(SB)/8, $0xc76c51a3c24b8b70
180DATA ·constants+0xb0(SB)/8, $0xd6990624d192e819
181DATA ·constants+0xb8(SB)/8, $0x106aa070f40e3585
182DATA ·constants+0xc0(SB)/8, $0x1e376c0819a4c116
183DATA ·constants+0xc8(SB)/8, $0x34b0bcb52748774c
184DATA ·constants+0xd0(SB)/8, $0x4ed8aa4a391c0cb3
185DATA ·constants+0xd8(SB)/8, $0x682e6ff35b9cca4f
186DATA ·constants+0xe0(SB)/8, $0x78a5636f748f82ee
187DATA ·constants+0xe8(SB)/8, $0x8cc7020884c87814
188DATA ·constants+0xf0(SB)/8, $0xa4506ceb90befffa
189DATA ·constants+0xf8(SB)/8, $0xc67178f2bef9a3f7
190
191GLOBL ·constants(SB), 8, $256
192
193