1 /**********************************************************************
2   Copyright(c) 2020 Arm Corporation All rights reserved.
3 
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions
6   are met:
7     * Redistributions of source code must retain the above copyright
8       notice, this list of conditions and the following disclaimer.
9     * Redistributions in binary form must reproduce the above copyright
10       notice, this list of conditions and the following disclaimer in
11       the documentation and/or other materials provided with the
12       distribution.
13     * Neither the name of Arm Corporation nor the names of its
14       contributors may be used to endorse or promote products derived
15       from this software without specific prior written permission.
16 
17   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 **********************************************************************/
29 
30 
31 
32 
33 .macro	crc32_hw_common		poly_type
34 
35 .ifc	\poly_type,crc32
36 	mvn		wCRC,wCRC
37 .endif
38 	cbz		LEN, .zero_length_ret
39 	tbz		BUF, 0, .align_short
40 	ldrb		wdata,[BUF],1
41 	sub		LEN,LEN,1
42 	crc32_u8	wCRC,wCRC,wdata
43 .align_short:
44 	tst		BUF,2
45 	ccmp		LEN,1,0,ne
46 	bhi		.align_short_2
47 	tst		BUF,4
48 	ccmp		LEN,3,0,ne
49 	bhi		.align_word
50 
51 .align_finish:
52 
53 	cmp		LEN, 63
54 	bls		.loop_16B
55 .loop_64B:
56 	ldp		data0, data1, [BUF],#16
57 	prfm		pldl2keep,[BUF,2048]
58 	sub		LEN,LEN,#64
59 	ldp		data2, data3, [BUF],#16
60 	prfm		pldl1keep,[BUF,256]
61 	cmp		LEN,#64
62 	crc32_u64	wCRC, wCRC, data0
63 	crc32_u64	wCRC, wCRC, data1
64 	ldp		data0, data1, [BUF],#16
65 	crc32_u64	wCRC, wCRC, data2
66 	crc32_u64	wCRC, wCRC, data3
67 	ldp		data2, data3, [BUF],#16
68 	crc32_u64	wCRC, wCRC, data0
69 	crc32_u64	wCRC, wCRC, data1
70 	crc32_u64	wCRC, wCRC, data2
71 	crc32_u64	wCRC, wCRC, data3
72 	bge		.loop_64B
73 
74 .loop_16B:
75 	cmp		LEN, 15
76 	bls		.less_16B
77 	ldp		data0, data1, [BUF],#16
78 	sub		LEN,LEN,#16
79 	cmp		LEN,15
80 	crc32_u64	wCRC, wCRC, data0
81 	crc32_u64	wCRC, wCRC, data1
82 	bls		.less_16B
83 	ldp		data0, data1, [BUF],#16
84 	sub		LEN,LEN,#16
85 	cmp		LEN,15
86 	crc32_u64	wCRC, wCRC, data0
87 	crc32_u64	wCRC, wCRC, data1
88 	bls		.less_16B
89 	ldp		data0, data1, [BUF],#16
90 	sub		LEN,LEN,#16   //MUST less than 16B
91 	crc32_u64	wCRC, wCRC, data0
92 	crc32_u64	wCRC, wCRC, data1
93 .less_16B:
94 	cmp		LEN, 7
95 	bls		.less_8B
96 	ldr		data0, [BUF], 8
97 	sub		LEN, LEN, #8
98 	crc32_u64	wCRC, wCRC, data0
99 .less_8B:
100 	cmp		LEN, 3
101 	bls		.less_4B
102 	ldr		wdata, [BUF], 4
103 	sub		LEN, LEN, #4
104 	crc32_u32	wCRC, wCRC, wdata
105 .less_4B:
106 	cmp		LEN, 1
107 	bls		.less_2B
108 	ldrh		wdata, [BUF], 2
109 	sub		LEN, LEN, #2
110 	crc32_u16	wCRC, wCRC, wdata
111 .less_2B:
112 	cbz		LEN, .zero_length_ret
113 	ldrb		wdata, [BUF]
114 	crc32_u8	wCRC, wCRC, wdata
115 .zero_length_ret:
116 .ifc	\poly_type,crc32
117 	mvn		w0, wCRC
118 .else
119 	mov		w0, wCRC
120 .endif
121 	ret
122 .align_short_2:
123 	ldrh		wdata, [BUF], 2
124 	sub		LEN, LEN, 2
125 	tst		BUF, 4
126 	crc32_u16	wCRC, wCRC, wdata
127 	ccmp		LEN, 3, 0, ne
128 	bls		.align_finish
129 .align_word:
130 	ldr		wdata, [BUF], 4
131 	sub		LEN, LEN, #4
132 	crc32_u32	wCRC, wCRC, wdata
133 	b .align_finish
134 .endm
135 
136 .macro	crc32_3crc_fold poly_type
137 .ifc	\poly_type,crc32
138 	mvn		wCRC,wCRC
139 .endif
140 	cbz		LEN, .zero_length_ret
141 	tbz		BUF, 0, .align_short
142 	ldrb		wdata,[BUF],1
143 	sub		LEN,LEN,1
144 	crc32_u8	wCRC,wCRC,wdata
145 .align_short:
146 	tst		BUF,2
147 	ccmp		LEN,1,0,ne
148 	bhi		.align_short_2
149 	tst		BUF,4
150 	ccmp		LEN,3,0,ne
151 	bhi		.align_word
152 
153 .align_finish:
154 	cmp	LEN,1023
155 	adr	const_adr, .Lconstants
156 	bls	1f
157 	ldp	dconst0,dconst1,[const_adr]
158 2:
159 	ldr		crc0_data0,[ptr_crc0],8
160 	prfm		pldl2keep,[ptr_crc0,3*1024-8]
161 	mov		crc1,0
162 	mov		crc2,0
163 	add		ptr_crc1,ptr_crc0,336
164 	add		ptr_crc2,ptr_crc0,336*2
165 	crc32_u64	crc0,crc0,crc0_data0
166 	.set		offset,0
167 	.set		ptr_offset,8
168 	.rept		5
169 	ldp		crc0_data0,crc0_data1,[ptr_crc0],16
170 	ldp		crc1_data0,crc1_data1,[ptr_crc1],16
171 	.set		offset,offset+64
172 	.set		ptr_offset,ptr_offset+16
173 	prfm		pldl2keep,[ptr_crc0,3*1024-ptr_offset+offset]
174 	crc32_u64	crc0,crc0,crc0_data0
175 	crc32_u64	crc0,crc0,crc0_data1
176 	ldp		crc2_data0,crc2_data1,[ptr_crc2],16
177 	crc32_u64	crc1,crc1,crc1_data0
178 	crc32_u64	crc1,crc1,crc1_data1
179 	crc32_u64	crc2,crc2,crc2_data0
180 	crc32_u64	crc2,crc2,crc2_data1
181 	.endr
182 	.set		l1_offset,0
183 	.rept		10
184 	ldp		crc0_data0,crc0_data1,[ptr_crc0],16
185 	ldp		crc1_data0,crc1_data1,[ptr_crc1],16
186 	.set		offset,offset+64
187 	.set		ptr_offset,ptr_offset+16
188 	prfm		pldl2keep,[ptr_crc0,3*1024-ptr_offset+offset]
189 	prfm		pldl1keep,[ptr_crc0,2*1024-ptr_offset+l1_offset]
190 	.set		l1_offset,l1_offset+64
191 	crc32_u64	crc0,crc0,crc0_data0
192 	crc32_u64	crc0,crc0,crc0_data1
193 	ldp		crc2_data0,crc2_data1,[ptr_crc2],16
194 	crc32_u64	crc1,crc1,crc1_data0
195 	crc32_u64	crc1,crc1,crc1_data1
196 	crc32_u64	crc2,crc2,crc2_data0
197 	crc32_u64	crc2,crc2,crc2_data1
198 	.endr
199 
200 	.rept		6
201 	ldp		crc0_data0,crc0_data1,[ptr_crc0],16
202 	ldp		crc1_data0,crc1_data1,[ptr_crc1],16
203 	.set		ptr_offset,ptr_offset+16
204 	prfm		pldl1keep,[ptr_crc0,2*1024-ptr_offset+l1_offset]
205 	.set		l1_offset,l1_offset+64
206 	crc32_u64	crc0,crc0,crc0_data0
207 	crc32_u64	crc0,crc0,crc0_data1
208 	ldp		crc2_data0,crc2_data1,[ptr_crc2],16
209 	crc32_u64	crc1,crc1,crc1_data0
210 	crc32_u64	crc1,crc1,crc1_data1
211 	crc32_u64	crc2,crc2,crc2_data0
212 	crc32_u64	crc2,crc2,crc2_data1
213 	.endr
214 	ldr		crc2_data0,[ptr_crc2]
215 	fmov		dtmp0,xcrc0
216 	fmov		dtmp1,xcrc1
217 	crc32_u64	crc2,crc2,crc2_data0
218 	add		ptr_crc0,ptr_crc0,1024-(336+8)
219 	pmull		vtmp0.1q,vtmp0.1d,vconst0.1d
220 	sub		LEN,LEN,1024
221 	pmull		vtmp1.1q,vtmp1.1d,vconst1.1d
222 	cmp		LEN,1024
223 	fmov		xcrc0,dtmp0
224 	fmov		xcrc1,dtmp1
225 	crc32_u64	crc0,wzr,xcrc0
226 	crc32_u64	crc1,wzr,xcrc1
227 
228 	eor		crc0,crc0,crc2
229 	eor		crc0,crc0,crc1
230 
231 	bhs	2b
232 1:
233 	cmp		LEN, 63
234 	bls		.loop_16B
235 .loop_64B:
236 	ldp		data0, data1, [BUF],#16
237 	sub		LEN,LEN,#64
238 	ldp		data2, data3, [BUF],#16
239 	cmp		LEN,#64
240 	crc32_u64	wCRC, wCRC, data0
241 	crc32_u64	wCRC, wCRC, data1
242 	ldp		data0, data1, [BUF],#16
243 	crc32_u64	wCRC, wCRC, data2
244 	crc32_u64	wCRC, wCRC, data3
245 	ldp		data2, data3, [BUF],#16
246 	crc32_u64	wCRC, wCRC, data0
247 	crc32_u64	wCRC, wCRC, data1
248 	crc32_u64	wCRC, wCRC, data2
249 	crc32_u64	wCRC, wCRC, data3
250 	bge		.loop_64B
251 
252 .loop_16B:
253 	cmp		LEN, 15
254 	bls		.less_16B
255 	ldp		data0, data1, [BUF],#16
256 	sub		LEN,LEN,#16
257 	cmp		LEN,15
258 	crc32_u64	wCRC, wCRC, data0
259 	crc32_u64	wCRC, wCRC, data1
260 	bls		.less_16B
261 	ldp		data0, data1, [BUF],#16
262 	sub		LEN,LEN,#16
263 	cmp		LEN,15
264 	crc32_u64	wCRC, wCRC, data0
265 	crc32_u64	wCRC, wCRC, data1
266 	bls		.less_16B
267 	ldp		data0, data1, [BUF],#16
268 	sub		LEN,LEN,#16   //MUST less than 16B
269 	crc32_u64	wCRC, wCRC, data0
270 	crc32_u64	wCRC, wCRC, data1
271 .less_16B:
272 	cmp		LEN, 7
273 	bls		.less_8B
274 	ldr		data0, [BUF], 8
275 	sub		LEN, LEN, #8
276 	crc32_u64	wCRC, wCRC, data0
277 .less_8B:
278 	cmp		LEN, 3
279 	bls		.less_4B
280 	ldr		wdata, [BUF], 4
281 	sub		LEN, LEN, #4
282 	crc32_u32	wCRC, wCRC, wdata
283 .less_4B:
284 	cmp		LEN, 1
285 	bls		.less_2B
286 	ldrh		wdata, [BUF], 2
287 	sub		LEN, LEN, #2
288 	crc32_u16	wCRC, wCRC, wdata
289 .less_2B:
290 	cbz		LEN, .zero_length_ret
291 	ldrb		wdata, [BUF]
292 	crc32_u8	wCRC, wCRC, wdata
293 .zero_length_ret:
294 .ifc	\poly_type,crc32
295 	mvn		w0, wCRC
296 .else
297 	mov		w0, wCRC
298 .endif
299 	ret
300 .align_short_2:
301 	ldrh		wdata, [BUF], 2
302 	sub		LEN, LEN, 2
303 	tst		BUF, 4
304 	crc32_u16	wCRC, wCRC, wdata
305 	ccmp		LEN, 3, 0, ne
306 	bls		.align_finish
307 .align_word:
308 	ldr		wdata, [BUF], 4
309 	sub		LEN, LEN, #4
310 	crc32_u32	wCRC, wCRC, wdata
311 	b .align_finish
312 .Lconstants:
313 .ifc	\poly_type,crc32
314 	.quad		0xb486819b
315 	.quad		0x76278617
316 .else
317 	.quad		0xe417f38a
318 	.quad		0x8f158014
319 .endif
320 
321 .endm
322