1 /********************************************************************** 2 Copyright(c) 2020 Arm Corporation All rights reserved. 3 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions 6 are met: 7 * Redistributions of source code must retain the above copyright 8 notice, this list of conditions and the following disclaimer. 9 * Redistributions in binary form must reproduce the above copyright 10 notice, this list of conditions and the following disclaimer in 11 the documentation and/or other materials provided with the 12 distribution. 13 * Neither the name of Arm Corporation nor the names of its 14 contributors may be used to endorse or promote products derived 15 from this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 **********************************************************************/ 29 30 31 32 33 .macro crc32_hw_common poly_type 34 35 .ifc \poly_type,crc32 36 mvn wCRC,wCRC 37 .endif 38 cbz LEN, .zero_length_ret 39 tbz BUF, 0, .align_short 40 ldrb wdata,[BUF],1 41 sub LEN,LEN,1 42 crc32_u8 wCRC,wCRC,wdata 43 .align_short: 44 tst BUF,2 45 ccmp LEN,1,0,ne 46 bhi .align_short_2 47 tst BUF,4 48 ccmp LEN,3,0,ne 49 bhi .align_word 50 51 .align_finish: 52 53 cmp LEN, 63 54 bls .loop_16B 55 .loop_64B: 56 ldp data0, data1, [BUF],#16 57 prfm pldl2keep,[BUF,2048] 58 sub LEN,LEN,#64 59 ldp data2, data3, [BUF],#16 60 prfm pldl1keep,[BUF,256] 61 cmp LEN,#64 62 crc32_u64 wCRC, wCRC, data0 63 crc32_u64 wCRC, wCRC, data1 64 ldp data0, data1, [BUF],#16 65 crc32_u64 wCRC, wCRC, data2 66 crc32_u64 wCRC, wCRC, data3 67 ldp data2, data3, [BUF],#16 68 crc32_u64 wCRC, wCRC, data0 69 crc32_u64 wCRC, wCRC, data1 70 crc32_u64 wCRC, wCRC, data2 71 crc32_u64 wCRC, wCRC, data3 72 bge .loop_64B 73 74 .loop_16B: 75 cmp LEN, 15 76 bls .less_16B 77 ldp data0, data1, [BUF],#16 78 sub LEN,LEN,#16 79 cmp LEN,15 80 crc32_u64 wCRC, wCRC, data0 81 crc32_u64 wCRC, wCRC, data1 82 bls .less_16B 83 ldp data0, data1, [BUF],#16 84 sub LEN,LEN,#16 85 cmp LEN,15 86 crc32_u64 wCRC, wCRC, data0 87 crc32_u64 wCRC, wCRC, data1 88 bls .less_16B 89 ldp data0, data1, [BUF],#16 90 sub LEN,LEN,#16 //MUST less than 16B 91 crc32_u64 wCRC, wCRC, data0 92 crc32_u64 wCRC, wCRC, data1 93 .less_16B: 94 cmp LEN, 7 95 bls .less_8B 96 ldr data0, [BUF], 8 97 sub LEN, LEN, #8 98 crc32_u64 wCRC, wCRC, data0 99 .less_8B: 100 cmp LEN, 3 101 bls .less_4B 102 ldr wdata, [BUF], 4 103 sub LEN, LEN, #4 104 crc32_u32 wCRC, wCRC, wdata 105 .less_4B: 106 cmp LEN, 1 107 bls .less_2B 108 ldrh wdata, [BUF], 2 109 sub LEN, LEN, #2 110 crc32_u16 wCRC, wCRC, wdata 111 .less_2B: 112 cbz LEN, .zero_length_ret 113 ldrb wdata, [BUF] 114 crc32_u8 wCRC, wCRC, wdata 115 .zero_length_ret: 116 .ifc \poly_type,crc32 117 mvn w0, wCRC 118 .else 119 mov w0, wCRC 120 .endif 121 ret 122 .align_short_2: 123 ldrh wdata, [BUF], 2 124 sub LEN, LEN, 2 125 tst BUF, 4 126 crc32_u16 wCRC, wCRC, wdata 127 ccmp LEN, 3, 0, ne 128 bls .align_finish 129 .align_word: 130 ldr wdata, [BUF], 4 131 sub LEN, LEN, #4 132 crc32_u32 wCRC, wCRC, wdata 133 b .align_finish 134 .endm 135 136 .macro crc32_3crc_fold poly_type 137 .ifc \poly_type,crc32 138 mvn wCRC,wCRC 139 .endif 140 cbz LEN, .zero_length_ret 141 tbz BUF, 0, .align_short 142 ldrb wdata,[BUF],1 143 sub LEN,LEN,1 144 crc32_u8 wCRC,wCRC,wdata 145 .align_short: 146 tst BUF,2 147 ccmp LEN,1,0,ne 148 bhi .align_short_2 149 tst BUF,4 150 ccmp LEN,3,0,ne 151 bhi .align_word 152 153 .align_finish: 154 cmp LEN,1023 155 adr const_adr, .Lconstants 156 bls 1f 157 ldp dconst0,dconst1,[const_adr] 158 2: 159 ldr crc0_data0,[ptr_crc0],8 160 prfm pldl2keep,[ptr_crc0,3*1024-8] 161 mov crc1,0 162 mov crc2,0 163 add ptr_crc1,ptr_crc0,336 164 add ptr_crc2,ptr_crc0,336*2 165 crc32_u64 crc0,crc0,crc0_data0 166 .set offset,0 167 .set ptr_offset,8 168 .rept 5 169 ldp crc0_data0,crc0_data1,[ptr_crc0],16 170 ldp crc1_data0,crc1_data1,[ptr_crc1],16 171 .set offset,offset+64 172 .set ptr_offset,ptr_offset+16 173 prfm pldl2keep,[ptr_crc0,3*1024-ptr_offset+offset] 174 crc32_u64 crc0,crc0,crc0_data0 175 crc32_u64 crc0,crc0,crc0_data1 176 ldp crc2_data0,crc2_data1,[ptr_crc2],16 177 crc32_u64 crc1,crc1,crc1_data0 178 crc32_u64 crc1,crc1,crc1_data1 179 crc32_u64 crc2,crc2,crc2_data0 180 crc32_u64 crc2,crc2,crc2_data1 181 .endr 182 .set l1_offset,0 183 .rept 10 184 ldp crc0_data0,crc0_data1,[ptr_crc0],16 185 ldp crc1_data0,crc1_data1,[ptr_crc1],16 186 .set offset,offset+64 187 .set ptr_offset,ptr_offset+16 188 prfm pldl2keep,[ptr_crc0,3*1024-ptr_offset+offset] 189 prfm pldl1keep,[ptr_crc0,2*1024-ptr_offset+l1_offset] 190 .set l1_offset,l1_offset+64 191 crc32_u64 crc0,crc0,crc0_data0 192 crc32_u64 crc0,crc0,crc0_data1 193 ldp crc2_data0,crc2_data1,[ptr_crc2],16 194 crc32_u64 crc1,crc1,crc1_data0 195 crc32_u64 crc1,crc1,crc1_data1 196 crc32_u64 crc2,crc2,crc2_data0 197 crc32_u64 crc2,crc2,crc2_data1 198 .endr 199 200 .rept 6 201 ldp crc0_data0,crc0_data1,[ptr_crc0],16 202 ldp crc1_data0,crc1_data1,[ptr_crc1],16 203 .set ptr_offset,ptr_offset+16 204 prfm pldl1keep,[ptr_crc0,2*1024-ptr_offset+l1_offset] 205 .set l1_offset,l1_offset+64 206 crc32_u64 crc0,crc0,crc0_data0 207 crc32_u64 crc0,crc0,crc0_data1 208 ldp crc2_data0,crc2_data1,[ptr_crc2],16 209 crc32_u64 crc1,crc1,crc1_data0 210 crc32_u64 crc1,crc1,crc1_data1 211 crc32_u64 crc2,crc2,crc2_data0 212 crc32_u64 crc2,crc2,crc2_data1 213 .endr 214 ldr crc2_data0,[ptr_crc2] 215 fmov dtmp0,xcrc0 216 fmov dtmp1,xcrc1 217 crc32_u64 crc2,crc2,crc2_data0 218 add ptr_crc0,ptr_crc0,1024-(336+8) 219 pmull vtmp0.1q,vtmp0.1d,vconst0.1d 220 sub LEN,LEN,1024 221 pmull vtmp1.1q,vtmp1.1d,vconst1.1d 222 cmp LEN,1024 223 fmov xcrc0,dtmp0 224 fmov xcrc1,dtmp1 225 crc32_u64 crc0,wzr,xcrc0 226 crc32_u64 crc1,wzr,xcrc1 227 228 eor crc0,crc0,crc2 229 eor crc0,crc0,crc1 230 231 bhs 2b 232 1: 233 cmp LEN, 63 234 bls .loop_16B 235 .loop_64B: 236 ldp data0, data1, [BUF],#16 237 sub LEN,LEN,#64 238 ldp data2, data3, [BUF],#16 239 cmp LEN,#64 240 crc32_u64 wCRC, wCRC, data0 241 crc32_u64 wCRC, wCRC, data1 242 ldp data0, data1, [BUF],#16 243 crc32_u64 wCRC, wCRC, data2 244 crc32_u64 wCRC, wCRC, data3 245 ldp data2, data3, [BUF],#16 246 crc32_u64 wCRC, wCRC, data0 247 crc32_u64 wCRC, wCRC, data1 248 crc32_u64 wCRC, wCRC, data2 249 crc32_u64 wCRC, wCRC, data3 250 bge .loop_64B 251 252 .loop_16B: 253 cmp LEN, 15 254 bls .less_16B 255 ldp data0, data1, [BUF],#16 256 sub LEN,LEN,#16 257 cmp LEN,15 258 crc32_u64 wCRC, wCRC, data0 259 crc32_u64 wCRC, wCRC, data1 260 bls .less_16B 261 ldp data0, data1, [BUF],#16 262 sub LEN,LEN,#16 263 cmp LEN,15 264 crc32_u64 wCRC, wCRC, data0 265 crc32_u64 wCRC, wCRC, data1 266 bls .less_16B 267 ldp data0, data1, [BUF],#16 268 sub LEN,LEN,#16 //MUST less than 16B 269 crc32_u64 wCRC, wCRC, data0 270 crc32_u64 wCRC, wCRC, data1 271 .less_16B: 272 cmp LEN, 7 273 bls .less_8B 274 ldr data0, [BUF], 8 275 sub LEN, LEN, #8 276 crc32_u64 wCRC, wCRC, data0 277 .less_8B: 278 cmp LEN, 3 279 bls .less_4B 280 ldr wdata, [BUF], 4 281 sub LEN, LEN, #4 282 crc32_u32 wCRC, wCRC, wdata 283 .less_4B: 284 cmp LEN, 1 285 bls .less_2B 286 ldrh wdata, [BUF], 2 287 sub LEN, LEN, #2 288 crc32_u16 wCRC, wCRC, wdata 289 .less_2B: 290 cbz LEN, .zero_length_ret 291 ldrb wdata, [BUF] 292 crc32_u8 wCRC, wCRC, wdata 293 .zero_length_ret: 294 .ifc \poly_type,crc32 295 mvn w0, wCRC 296 .else 297 mov w0, wCRC 298 .endif 299 ret 300 .align_short_2: 301 ldrh wdata, [BUF], 2 302 sub LEN, LEN, 2 303 tst BUF, 4 304 crc32_u16 wCRC, wCRC, wdata 305 ccmp LEN, 3, 0, ne 306 bls .align_finish 307 .align_word: 308 ldr wdata, [BUF], 4 309 sub LEN, LEN, #4 310 crc32_u32 wCRC, wCRC, wdata 311 b .align_finish 312 .Lconstants: 313 .ifc \poly_type,crc32 314 .quad 0xb486819b 315 .quad 0x76278617 316 .else 317 .quad 0xe417f38a 318 .quad 0x8f158014 319 .endif 320 321 .endm 322