1/** 2 * Author......: See docs/credits.txt 3 * License.....: MIT 4 */ 5 6#include "inc_vendor.h" 7#include "inc_types.h" 8#include "inc_platform.h" 9#include "inc_common.h" 10#include "inc_rp_optimized.h" 11 12#ifndef MAYBE_UNUSED 13#define MAYBE_UNUSED 14#endif 15 16DECLSPEC u32 generate_cmask_optimized (const u32 value) 17{ 18 const u32 rmask = ((value & 0x40404040u) >> 1u) 19 & ~((value & 0x80808080u) >> 2u); 20 21 const u32 hmask = (value & 0x1f1f1f1fu) + 0x05050505u; 22 const u32 lmask = (value & 0x1f1f1f1fu) + 0x1f1f1f1fu; 23 24 return rmask & ~hmask & lmask; 25} 26 27DECLSPEC void truncate_right_optimized (u32 *buf0, u32 *buf1, const u32 offset) 28{ 29 const u32 tmp = (1u << ((offset & 3u) * 8u)) - 1u; 30 31 const int offset_switch = offset / 4; 32 33 switch (offset_switch) 34 { 35 case 0: buf0[0] &= tmp; 36 buf0[1] = 0; 37 buf0[2] = 0; 38 buf0[3] = 0; 39 buf1[0] = 0; 40 buf1[1] = 0; 41 buf1[2] = 0; 42 buf1[3] = 0; 43 break; 44 case 1: buf0[1] &= tmp; 45 buf0[2] = 0; 46 buf0[3] = 0; 47 buf1[0] = 0; 48 buf1[1] = 0; 49 buf1[2] = 0; 50 buf1[3] = 0; 51 break; 52 case 2: buf0[2] &= tmp; 53 buf0[3] = 0; 54 buf1[0] = 0; 55 buf1[1] = 0; 56 buf1[2] = 0; 57 buf1[3] = 0; 58 break; 59 case 3: buf0[3] &= tmp; 60 buf1[0] = 0; 61 buf1[1] = 0; 62 buf1[2] = 0; 63 buf1[3] = 0; 64 break; 65 case 4: buf1[0] &= tmp; 66 buf1[1] = 0; 67 buf1[2] = 0; 68 buf1[3] = 0; 69 break; 70 case 5: buf1[1] &= tmp; 71 buf1[2] = 0; 72 buf1[3] = 0; 73 break; 74 case 6: buf1[2] &= tmp; 75 buf1[3] = 0; 76 break; 77 case 7: buf1[3] &= tmp; 78 break; 79 } 80} 81 82DECLSPEC void truncate_left_optimized (u32 *buf0, u32 *buf1, const u32 offset) 83{ 84 const u32 tmp = ~((1u << ((offset & 3u) * 8u)) - 1u); 85 86 const int offset_switch = offset / 4; 87 88 switch (offset_switch) 89 { 90 case 0: buf0[0] &= tmp; 91 break; 92 case 1: buf0[0] = 0; 93 buf0[1] &= tmp; 94 break; 95 case 2: buf0[0] = 0; 96 buf0[1] = 0; 97 buf0[2] &= tmp; 98 break; 99 case 3: buf0[0] = 0; 100 buf0[1] = 0; 101 buf0[2] = 0; 102 buf0[3] &= tmp; 103 break; 104 case 4: buf0[0] = 0; 105 buf0[1] = 0; 106 buf0[2] = 0; 107 buf0[3] = 0; 108 buf1[0] &= tmp; 109 break; 110 case 5: buf0[0] = 0; 111 buf0[1] = 0; 112 buf0[2] = 0; 113 buf0[3] = 0; 114 buf1[0] = 0; 115 buf1[1] &= tmp; 116 break; 117 case 6: buf0[0] = 0; 118 buf0[1] = 0; 119 buf0[2] = 0; 120 buf0[3] = 0; 121 buf1[0] = 0; 122 buf1[1] = 0; 123 buf1[2] &= tmp; 124 break; 125 case 7: buf0[0] = 0; 126 buf0[1] = 0; 127 buf0[2] = 0; 128 buf0[3] = 0; 129 buf1[0] = 0; 130 buf1[1] = 0; 131 buf1[2] = 0; 132 buf1[3] &= tmp; 133 break; 134 } 135} 136 137DECLSPEC void lshift_block_optimized (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1) 138{ 139 out0[0] = hc_bytealign_S (in0[0], in0[1], 3); 140 out0[1] = hc_bytealign_S (in0[1], in0[2], 3); 141 out0[2] = hc_bytealign_S (in0[2], in0[3], 3); 142 out0[3] = hc_bytealign_S (in0[3], in1[0], 3); 143 out1[0] = hc_bytealign_S (in1[0], in1[1], 3); 144 out1[1] = hc_bytealign_S (in1[1], in1[2], 3); 145 out1[2] = hc_bytealign_S (in1[2], in1[3], 3); 146 out1[3] = hc_bytealign_S (in1[3], 0, 3); 147} 148 149DECLSPEC void rshift_block_optimized (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1) 150{ 151 out1[3] = hc_bytealign_S (in1[2], in1[3], 1); 152 out1[2] = hc_bytealign_S (in1[1], in1[2], 1); 153 out1[1] = hc_bytealign_S (in1[0], in1[1], 1); 154 out1[0] = hc_bytealign_S (in0[3], in1[0], 1); 155 out0[3] = hc_bytealign_S (in0[2], in0[3], 1); 156 out0[2] = hc_bytealign_S (in0[1], in0[2], 1); 157 out0[1] = hc_bytealign_S (in0[0], in0[1], 1); 158 out0[0] = hc_bytealign_S ( 0, in0[0], 1); 159} 160 161DECLSPEC void lshift_block_optimized_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const u32 num) 162{ 163 switch (num) 164 { 165 case 0: out0[0] = in0[0]; 166 out0[1] = in0[1]; 167 out0[2] = in0[2]; 168 out0[3] = in0[3]; 169 out1[0] = in1[0]; 170 out1[1] = in1[1]; 171 out1[2] = in1[2]; 172 out1[3] = in1[3]; 173 break; 174 case 1: out0[0] = hc_bytealign_S (in0[0], in0[1], 3); 175 out0[1] = hc_bytealign_S (in0[1], in0[2], 3); 176 out0[2] = hc_bytealign_S (in0[2], in0[3], 3); 177 out0[3] = hc_bytealign_S (in0[3], in1[0], 3); 178 out1[0] = hc_bytealign_S (in1[0], in1[1], 3); 179 out1[1] = hc_bytealign_S (in1[1], in1[2], 3); 180 out1[2] = hc_bytealign_S (in1[2], in1[3], 3); 181 out1[3] = hc_bytealign_S (in1[3], 0, 3); 182 break; 183 case 2: out0[0] = hc_bytealign_S (in0[0], in0[1], 2); 184 out0[1] = hc_bytealign_S (in0[1], in0[2], 2); 185 out0[2] = hc_bytealign_S (in0[2], in0[3], 2); 186 out0[3] = hc_bytealign_S (in0[3], in1[0], 2); 187 out1[0] = hc_bytealign_S (in1[0], in1[1], 2); 188 out1[1] = hc_bytealign_S (in1[1], in1[2], 2); 189 out1[2] = hc_bytealign_S (in1[2], in1[3], 2); 190 out1[3] = hc_bytealign_S (in1[3], 0, 2); 191 break; 192 case 3: out0[0] = hc_bytealign_S (in0[0], in0[1], 1); 193 out0[1] = hc_bytealign_S (in0[1], in0[2], 1); 194 out0[2] = hc_bytealign_S (in0[2], in0[3], 1); 195 out0[3] = hc_bytealign_S (in0[3], in1[0], 1); 196 out1[0] = hc_bytealign_S (in1[0], in1[1], 1); 197 out1[1] = hc_bytealign_S (in1[1], in1[2], 1); 198 out1[2] = hc_bytealign_S (in1[2], in1[3], 1); 199 out1[3] = hc_bytealign_S (in1[3], 0, 1); 200 break; 201 case 4: out0[0] = in0[1]; 202 out0[1] = in0[2]; 203 out0[2] = in0[3]; 204 out0[3] = in1[0]; 205 out1[0] = in1[1]; 206 out1[1] = in1[2]; 207 out1[2] = in1[3]; 208 out1[3] = 0; 209 break; 210 case 5: out0[0] = hc_bytealign_S (in0[1], in0[2], 3); 211 out0[1] = hc_bytealign_S (in0[2], in0[3], 3); 212 out0[2] = hc_bytealign_S (in0[3], in1[0], 3); 213 out0[3] = hc_bytealign_S (in1[0], in1[1], 3); 214 out1[0] = hc_bytealign_S (in1[1], in1[2], 3); 215 out1[1] = hc_bytealign_S (in1[2], in1[3], 3); 216 out1[2] = hc_bytealign_S (in1[3], 0, 3); 217 out1[3] = 0; 218 break; 219 case 6: out0[0] = hc_bytealign_S (in0[1], in0[2], 2); 220 out0[1] = hc_bytealign_S (in0[2], in0[3], 2); 221 out0[2] = hc_bytealign_S (in0[3], in1[0], 2); 222 out0[3] = hc_bytealign_S (in1[0], in1[1], 2); 223 out1[0] = hc_bytealign_S (in1[1], in1[2], 2); 224 out1[1] = hc_bytealign_S (in1[2], in1[3], 2); 225 out1[2] = hc_bytealign_S (in1[3], 0, 2); 226 out1[3] = 0; 227 break; 228 case 7: out0[0] = hc_bytealign_S (in0[1], in0[2], 1); 229 out0[1] = hc_bytealign_S (in0[2], in0[3], 1); 230 out0[2] = hc_bytealign_S (in0[3], in1[0], 1); 231 out0[3] = hc_bytealign_S (in1[0], in1[1], 1); 232 out1[0] = hc_bytealign_S (in1[1], in1[2], 1); 233 out1[1] = hc_bytealign_S (in1[2], in1[3], 1); 234 out1[2] = hc_bytealign_S (in1[3], 0, 1); 235 out1[3] = 0; 236 break; 237 case 8: out0[0] = in0[2]; 238 out0[1] = in0[3]; 239 out0[2] = in1[0]; 240 out0[3] = in1[1]; 241 out1[0] = in1[2]; 242 out1[1] = in1[3]; 243 out1[2] = 0; 244 out1[3] = 0; 245 break; 246 case 9: out0[0] = hc_bytealign_S (in0[2], in0[3], 3); 247 out0[1] = hc_bytealign_S (in0[3], in1[0], 3); 248 out0[2] = hc_bytealign_S (in1[0], in1[1], 3); 249 out0[3] = hc_bytealign_S (in1[1], in1[2], 3); 250 out1[0] = hc_bytealign_S (in1[2], in1[3], 3); 251 out1[1] = hc_bytealign_S (in1[3], 0, 3); 252 out1[2] = 0; 253 out1[3] = 0; 254 break; 255 case 10: out0[0] = hc_bytealign_S (in0[2], in0[3], 2); 256 out0[1] = hc_bytealign_S (in0[3], in1[0], 2); 257 out0[2] = hc_bytealign_S (in1[0], in1[1], 2); 258 out0[3] = hc_bytealign_S (in1[1], in1[2], 2); 259 out1[0] = hc_bytealign_S (in1[2], in1[3], 2); 260 out1[1] = hc_bytealign_S (in1[3], 0, 2); 261 out1[2] = 0; 262 out1[3] = 0; 263 break; 264 case 11: out0[0] = hc_bytealign_S (in0[2], in0[3], 1); 265 out0[1] = hc_bytealign_S (in0[3], in1[0], 1); 266 out0[2] = hc_bytealign_S (in1[0], in1[1], 1); 267 out0[3] = hc_bytealign_S (in1[1], in1[2], 1); 268 out1[0] = hc_bytealign_S (in1[2], in1[3], 1); 269 out1[1] = hc_bytealign_S (in1[3], 0, 1); 270 out1[2] = 0; 271 out1[3] = 0; 272 break; 273 case 12: out0[0] = in0[3]; 274 out0[1] = in1[0]; 275 out0[2] = in1[1]; 276 out0[3] = in1[2]; 277 out1[0] = in1[3]; 278 out1[1] = 0; 279 out1[2] = 0; 280 out1[3] = 0; 281 break; 282 case 13: out0[0] = hc_bytealign_S (in0[3], in1[0], 3); 283 out0[1] = hc_bytealign_S (in1[0], in1[1], 3); 284 out0[2] = hc_bytealign_S (in1[1], in1[2], 3); 285 out0[3] = hc_bytealign_S (in1[2], in1[3], 3); 286 out1[0] = hc_bytealign_S (in1[3], 0, 3); 287 out1[1] = 0; 288 out1[2] = 0; 289 out1[3] = 0; 290 break; 291 case 14: out0[0] = hc_bytealign_S (in0[3], in1[0], 2); 292 out0[1] = hc_bytealign_S (in1[0], in1[1], 2); 293 out0[2] = hc_bytealign_S (in1[1], in1[2], 2); 294 out0[3] = hc_bytealign_S (in1[2], in1[3], 2); 295 out1[0] = hc_bytealign_S (in1[3], 0, 2); 296 out1[1] = 0; 297 out1[2] = 0; 298 out1[3] = 0; 299 break; 300 case 15: out0[0] = hc_bytealign_S (in0[3], in1[0], 1); 301 out0[1] = hc_bytealign_S (in1[0], in1[1], 1); 302 out0[2] = hc_bytealign_S (in1[1], in1[2], 1); 303 out0[3] = hc_bytealign_S (in1[2], in1[3], 1); 304 out1[0] = hc_bytealign_S (in1[3], 0, 1); 305 out1[1] = 0; 306 out1[2] = 0; 307 out1[3] = 0; 308 break; 309 case 16: out0[0] = in1[0]; 310 out0[1] = in1[1]; 311 out0[2] = in1[2]; 312 out0[3] = in1[3]; 313 out1[0] = 0; 314 out1[1] = 0; 315 out1[2] = 0; 316 out1[3] = 0; 317 break; 318 case 17: out0[0] = hc_bytealign_S (in1[0], in1[1], 3); 319 out0[1] = hc_bytealign_S (in1[1], in1[2], 3); 320 out0[2] = hc_bytealign_S (in1[2], in1[3], 3); 321 out0[3] = hc_bytealign_S (in1[3], 0, 3); 322 out1[0] = 0; 323 out1[1] = 0; 324 out1[2] = 0; 325 out1[3] = 0; 326 break; 327 case 18: out0[0] = hc_bytealign_S (in1[0], in1[1], 2); 328 out0[1] = hc_bytealign_S (in1[1], in1[2], 2); 329 out0[2] = hc_bytealign_S (in1[2], in1[3], 2); 330 out0[3] = hc_bytealign_S (in1[3], 0, 2); 331 out1[0] = 0; 332 out1[1] = 0; 333 out1[2] = 0; 334 out1[3] = 0; 335 break; 336 case 19: out0[0] = hc_bytealign_S (in1[0], in1[1], 1); 337 out0[1] = hc_bytealign_S (in1[1], in1[2], 1); 338 out0[2] = hc_bytealign_S (in1[2], in1[3], 1); 339 out0[3] = hc_bytealign_S (in1[3], 0, 1); 340 out1[0] = 0; 341 out1[1] = 0; 342 out1[2] = 0; 343 out1[3] = 0; 344 break; 345 case 20: out0[0] = in1[1]; 346 out0[1] = in1[2]; 347 out0[2] = in1[3]; 348 out0[3] = 0; 349 out1[0] = 0; 350 out1[1] = 0; 351 out1[2] = 0; 352 out1[3] = 0; 353 break; 354 case 21: out0[0] = hc_bytealign_S (in1[1], in1[2], 3); 355 out0[1] = hc_bytealign_S (in1[2], in1[3], 3); 356 out0[2] = hc_bytealign_S (in1[3], 0, 3); 357 out0[3] = 0; 358 out1[0] = 0; 359 out1[1] = 0; 360 out1[2] = 0; 361 out1[3] = 0; 362 break; 363 case 22: out0[0] = hc_bytealign_S (in1[1], in1[2], 2); 364 out0[1] = hc_bytealign_S (in1[2], in1[3], 2); 365 out0[2] = hc_bytealign_S (in1[3], 0, 2); 366 out0[3] = 0; 367 out1[0] = 0; 368 out1[1] = 0; 369 out1[2] = 0; 370 out1[3] = 0; 371 break; 372 case 23: out0[0] = hc_bytealign_S (in1[1], in1[2], 1); 373 out0[1] = hc_bytealign_S (in1[2], in1[3], 1); 374 out0[2] = hc_bytealign_S (in1[3], 0, 1); 375 out0[3] = 0; 376 out1[0] = 0; 377 out1[1] = 0; 378 out1[2] = 0; 379 out1[3] = 0; 380 break; 381 case 24: out0[0] = in1[2]; 382 out0[1] = in1[3]; 383 out0[2] = 0; 384 out0[3] = 0; 385 out1[0] = 0; 386 out1[1] = 0; 387 out1[2] = 0; 388 out1[3] = 0; 389 break; 390 case 25: out0[0] = hc_bytealign_S (in1[2], in1[3], 3); 391 out0[1] = hc_bytealign_S (in1[3], 0, 3); 392 out0[2] = 0; 393 out0[3] = 0; 394 out1[0] = 0; 395 out1[1] = 0; 396 out1[2] = 0; 397 out1[3] = 0; 398 break; 399 case 26: out0[0] = hc_bytealign_S (in1[2], in1[3], 2); 400 out0[1] = hc_bytealign_S (in1[3], 0, 2); 401 out0[2] = 0; 402 out0[3] = 0; 403 out1[0] = 0; 404 out1[1] = 0; 405 out1[2] = 0; 406 out1[3] = 0; 407 break; 408 case 27: out0[0] = hc_bytealign_S (in1[2], in1[3], 1); 409 out0[1] = hc_bytealign_S (in1[3], 0, 1); 410 out0[2] = 0; 411 out0[3] = 0; 412 out1[0] = 0; 413 out1[1] = 0; 414 out1[2] = 0; 415 out1[3] = 0; 416 break; 417 case 28: out0[0] = in1[3]; 418 out0[1] = 0; 419 out0[2] = 0; 420 out0[3] = 0; 421 out1[0] = 0; 422 out1[1] = 0; 423 out1[2] = 0; 424 out1[3] = 0; 425 break; 426 case 29: out0[0] = hc_bytealign_S (in1[3], 0, 3); 427 out0[1] = 0; 428 out0[2] = 0; 429 out0[3] = 0; 430 out1[0] = 0; 431 out1[1] = 0; 432 out1[2] = 0; 433 out1[3] = 0; 434 break; 435 case 30: out0[0] = hc_bytealign_S (in1[3], 0, 2); 436 out0[1] = 0; 437 out0[2] = 0; 438 out0[3] = 0; 439 out1[0] = 0; 440 out1[1] = 0; 441 out1[2] = 0; 442 out1[3] = 0; 443 break; 444 case 31: out0[0] = hc_bytealign_S (in1[3], 0, 1); 445 out0[1] = 0; 446 out0[2] = 0; 447 out0[3] = 0; 448 out1[0] = 0; 449 out1[1] = 0; 450 out1[2] = 0; 451 out1[3] = 0; 452 break; 453 } 454} 455 456DECLSPEC void rshift_block_optimized_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const u32 num) 457{ 458 switch (num) 459 { 460 case 0: out1[3] = in1[3]; 461 out1[2] = in1[2]; 462 out1[1] = in1[1]; 463 out1[0] = in1[0]; 464 out0[3] = in0[3]; 465 out0[2] = in0[2]; 466 out0[1] = in0[1]; 467 out0[0] = in0[0]; 468 break; 469 case 1: out1[3] = hc_bytealign_S (in1[2], in1[3], 1); 470 out1[2] = hc_bytealign_S (in1[1], in1[2], 1); 471 out1[1] = hc_bytealign_S (in1[0], in1[1], 1); 472 out1[0] = hc_bytealign_S (in0[3], in1[0], 1); 473 out0[3] = hc_bytealign_S (in0[2], in0[3], 1); 474 out0[2] = hc_bytealign_S (in0[1], in0[2], 1); 475 out0[1] = hc_bytealign_S (in0[0], in0[1], 1); 476 out0[0] = hc_bytealign_S ( 0, in0[0], 1); 477 break; 478 case 2: out1[3] = hc_bytealign_S (in1[2], in1[3], 2); 479 out1[2] = hc_bytealign_S (in1[1], in1[2], 2); 480 out1[1] = hc_bytealign_S (in1[0], in1[1], 2); 481 out1[0] = hc_bytealign_S (in0[3], in1[0], 2); 482 out0[3] = hc_bytealign_S (in0[2], in0[3], 2); 483 out0[2] = hc_bytealign_S (in0[1], in0[2], 2); 484 out0[1] = hc_bytealign_S (in0[0], in0[1], 2); 485 out0[0] = hc_bytealign_S ( 0, in0[0], 2); 486 break; 487 case 3: out1[3] = hc_bytealign_S (in1[2], in1[3], 3); 488 out1[2] = hc_bytealign_S (in1[1], in1[2], 3); 489 out1[1] = hc_bytealign_S (in1[0], in1[1], 3); 490 out1[0] = hc_bytealign_S (in0[3], in1[0], 3); 491 out0[3] = hc_bytealign_S (in0[2], in0[3], 3); 492 out0[2] = hc_bytealign_S (in0[1], in0[2], 3); 493 out0[1] = hc_bytealign_S (in0[0], in0[1], 3); 494 out0[0] = hc_bytealign_S ( 0, in0[0], 3); 495 break; 496 case 4: out1[3] = in1[2]; 497 out1[2] = in1[1]; 498 out1[1] = in1[0]; 499 out1[0] = in0[3]; 500 out0[3] = in0[2]; 501 out0[2] = in0[1]; 502 out0[1] = in0[0]; 503 out0[0] = 0; 504 break; 505 case 5: out1[3] = hc_bytealign_S (in1[1], in1[2], 1); 506 out1[2] = hc_bytealign_S (in1[0], in1[1], 1); 507 out1[1] = hc_bytealign_S (in0[3], in1[0], 1); 508 out1[0] = hc_bytealign_S (in0[2], in0[3], 1); 509 out0[3] = hc_bytealign_S (in0[1], in0[2], 1); 510 out0[2] = hc_bytealign_S (in0[0], in0[1], 1); 511 out0[1] = hc_bytealign_S ( 0, in0[0], 1); 512 out0[0] = 0; 513 break; 514 case 6: out1[3] = hc_bytealign_S (in1[1], in1[2], 2); 515 out1[2] = hc_bytealign_S (in1[0], in1[1], 2); 516 out1[1] = hc_bytealign_S (in0[3], in1[0], 2); 517 out1[0] = hc_bytealign_S (in0[2], in0[3], 2); 518 out0[3] = hc_bytealign_S (in0[1], in0[2], 2); 519 out0[2] = hc_bytealign_S (in0[0], in0[1], 2); 520 out0[1] = hc_bytealign_S ( 0, in0[0], 2); 521 out0[0] = 0; 522 break; 523 case 7: out1[3] = hc_bytealign_S (in1[1], in1[2], 3); 524 out1[2] = hc_bytealign_S (in1[0], in1[1], 3); 525 out1[1] = hc_bytealign_S (in0[3], in1[0], 3); 526 out1[0] = hc_bytealign_S (in0[2], in0[3], 3); 527 out0[3] = hc_bytealign_S (in0[1], in0[2], 3); 528 out0[2] = hc_bytealign_S (in0[0], in0[1], 3); 529 out0[1] = hc_bytealign_S ( 0, in0[0], 3); 530 out0[0] = 0; 531 break; 532 case 8: out1[3] = in1[1]; 533 out1[2] = in1[0]; 534 out1[1] = in0[3]; 535 out1[0] = in0[2]; 536 out0[3] = in0[1]; 537 out0[2] = in0[0]; 538 out0[1] = 0; 539 out0[0] = 0; 540 break; 541 case 9: out1[3] = hc_bytealign_S (in1[0], in1[1], 1); 542 out1[2] = hc_bytealign_S (in0[3], in1[0], 1); 543 out1[1] = hc_bytealign_S (in0[2], in0[3], 1); 544 out1[0] = hc_bytealign_S (in0[1], in0[2], 1); 545 out0[3] = hc_bytealign_S (in0[0], in0[1], 1); 546 out0[2] = hc_bytealign_S ( 0, in0[0], 1); 547 out0[1] = 0; 548 out0[0] = 0; 549 break; 550 case 10: out1[3] = hc_bytealign_S (in1[0], in1[1], 2); 551 out1[2] = hc_bytealign_S (in0[3], in1[0], 2); 552 out1[1] = hc_bytealign_S (in0[2], in0[3], 2); 553 out1[0] = hc_bytealign_S (in0[1], in0[2], 2); 554 out0[3] = hc_bytealign_S (in0[0], in0[1], 2); 555 out0[2] = hc_bytealign_S ( 0, in0[0], 2); 556 out0[1] = 0; 557 out0[0] = 0; 558 break; 559 case 11: out1[3] = hc_bytealign_S (in1[0], in1[1], 3); 560 out1[2] = hc_bytealign_S (in0[3], in1[0], 3); 561 out1[1] = hc_bytealign_S (in0[2], in0[3], 3); 562 out1[0] = hc_bytealign_S (in0[1], in0[2], 3); 563 out0[3] = hc_bytealign_S (in0[0], in0[1], 3); 564 out0[2] = hc_bytealign_S ( 0, in0[0], 3); 565 out0[1] = 0; 566 out0[0] = 0; 567 break; 568 case 12: out1[3] = in1[0]; 569 out1[2] = in0[3]; 570 out1[1] = in0[2]; 571 out1[0] = in0[1]; 572 out0[3] = in0[0]; 573 out0[2] = 0; 574 out0[1] = 0; 575 out0[0] = 0; 576 break; 577 case 13: out1[3] = hc_bytealign_S (in0[3], in1[0], 1); 578 out1[2] = hc_bytealign_S (in0[2], in0[3], 1); 579 out1[1] = hc_bytealign_S (in0[1], in0[2], 1); 580 out1[0] = hc_bytealign_S (in0[0], in0[1], 1); 581 out0[3] = hc_bytealign_S ( 0, in0[0], 1); 582 out0[2] = 0; 583 out0[1] = 0; 584 out0[0] = 0; 585 break; 586 case 14: out1[3] = hc_bytealign_S (in0[3], in1[0], 2); 587 out1[2] = hc_bytealign_S (in0[2], in0[3], 2); 588 out1[1] = hc_bytealign_S (in0[1], in0[2], 2); 589 out1[0] = hc_bytealign_S (in0[0], in0[1], 2); 590 out0[3] = hc_bytealign_S ( 0, in0[0], 2); 591 out0[2] = 0; 592 out0[1] = 0; 593 out0[0] = 0; 594 break; 595 case 15: out1[3] = hc_bytealign_S (in0[3], in1[0], 3); 596 out1[2] = hc_bytealign_S (in0[2], in0[3], 3); 597 out1[1] = hc_bytealign_S (in0[1], in0[2], 3); 598 out1[0] = hc_bytealign_S (in0[0], in0[1], 3); 599 out0[3] = hc_bytealign_S ( 0, in0[0], 3); 600 out0[2] = 0; 601 out0[1] = 0; 602 out0[0] = 0; 603 break; 604 case 16: out1[3] = in0[3]; 605 out1[2] = in0[2]; 606 out1[1] = in0[1]; 607 out1[0] = in0[0]; 608 out0[3] = 0; 609 out0[2] = 0; 610 out0[1] = 0; 611 out0[0] = 0; 612 break; 613 case 17: out1[3] = hc_bytealign_S (in0[2], in0[3], 1); 614 out1[2] = hc_bytealign_S (in0[1], in0[2], 1); 615 out1[1] = hc_bytealign_S (in0[0], in0[1], 1); 616 out1[0] = hc_bytealign_S ( 0, in0[0], 1); 617 out0[3] = 0; 618 out0[2] = 0; 619 out0[1] = 0; 620 out0[0] = 0; 621 break; 622 case 18: out1[3] = hc_bytealign_S (in0[2], in0[3], 2); 623 out1[2] = hc_bytealign_S (in0[1], in0[2], 2); 624 out1[1] = hc_bytealign_S (in0[0], in0[1], 2); 625 out1[0] = hc_bytealign_S ( 0, in0[0], 2); 626 out0[3] = 0; 627 out0[2] = 0; 628 out0[1] = 0; 629 out0[0] = 0; 630 break; 631 case 19: out1[3] = hc_bytealign_S (in0[2], in0[3], 3); 632 out1[2] = hc_bytealign_S (in0[1], in0[2], 3); 633 out1[1] = hc_bytealign_S (in0[0], in0[1], 3); 634 out1[0] = hc_bytealign_S ( 0, in0[0], 3); 635 out0[3] = 0; 636 out0[2] = 0; 637 out0[1] = 0; 638 out0[0] = 0; 639 break; 640 case 20: out1[3] = in0[2]; 641 out1[2] = in0[1]; 642 out1[1] = in0[0]; 643 out1[0] = 0; 644 out0[3] = 0; 645 out0[2] = 0; 646 out0[1] = 0; 647 out0[0] = 0; 648 break; 649 case 21: out1[3] = hc_bytealign_S (in0[1], in0[2], 1); 650 out1[2] = hc_bytealign_S (in0[0], in0[1], 1); 651 out1[1] = hc_bytealign_S ( 0, in0[0], 1); 652 out1[0] = 0; 653 out0[3] = 0; 654 out0[2] = 0; 655 out0[1] = 0; 656 out0[0] = 0; 657 break; 658 case 22: out1[3] = hc_bytealign_S (in0[1], in0[2], 2); 659 out1[2] = hc_bytealign_S (in0[0], in0[1], 2); 660 out1[1] = hc_bytealign_S ( 0, in0[0], 2); 661 out1[0] = 0; 662 out0[3] = 0; 663 out0[2] = 0; 664 out0[1] = 0; 665 out0[0] = 0; 666 break; 667 case 23: out1[3] = hc_bytealign_S (in0[1], in0[2], 3); 668 out1[2] = hc_bytealign_S (in0[0], in0[1], 3); 669 out1[1] = hc_bytealign_S ( 0, in0[0], 3); 670 out1[0] = 0; 671 out0[3] = 0; 672 out0[2] = 0; 673 out0[1] = 0; 674 out0[0] = 0; 675 break; 676 case 24: out1[3] = in0[1]; 677 out1[2] = in0[0]; 678 out1[1] = 0; 679 out1[0] = 0; 680 out0[3] = 0; 681 out0[2] = 0; 682 out0[1] = 0; 683 out0[0] = 0; 684 break; 685 case 25: out1[3] = hc_bytealign_S (in0[0], in0[1], 1); 686 out1[2] = hc_bytealign_S ( 0, in0[0], 1); 687 out1[1] = 0; 688 out1[0] = 0; 689 out0[3] = 0; 690 out0[2] = 0; 691 out0[1] = 0; 692 out0[0] = 0; 693 break; 694 case 26: out1[3] = hc_bytealign_S (in0[0], in0[1], 2); 695 out1[2] = hc_bytealign_S ( 0, in0[0], 2); 696 out1[1] = 0; 697 out1[0] = 0; 698 out0[3] = 0; 699 out0[2] = 0; 700 out0[1] = 0; 701 out0[0] = 0; 702 break; 703 case 27: out1[3] = hc_bytealign_S (in0[0], in0[1], 3); 704 out1[2] = hc_bytealign_S ( 0, in0[0], 3); 705 out1[1] = 0; 706 out1[0] = 0; 707 out0[3] = 0; 708 out0[2] = 0; 709 out0[1] = 0; 710 out0[0] = 0; 711 break; 712 case 28: out1[3] = in0[0]; 713 out1[2] = 0; 714 out1[1] = 0; 715 out1[0] = 0; 716 out0[3] = 0; 717 out0[2] = 0; 718 out0[1] = 0; 719 out0[0] = 0; 720 break; 721 case 29: out1[3] = hc_bytealign_S ( 0, in0[0], 1); 722 out1[2] = 0; 723 out1[1] = 0; 724 out1[0] = 0; 725 out0[3] = 0; 726 out0[2] = 0; 727 out0[1] = 0; 728 out0[0] = 0; 729 break; 730 case 30: out1[3] = hc_bytealign_S ( 0, in0[0], 2); 731 out1[2] = 0; 732 out1[1] = 0; 733 out1[0] = 0; 734 out0[3] = 0; 735 out0[2] = 0; 736 out0[1] = 0; 737 out0[0] = 0; 738 break; 739 case 31: out1[3] = hc_bytealign_S ( 0, in0[0], 3); 740 out1[2] = 0; 741 out1[1] = 0; 742 out1[0] = 0; 743 out0[3] = 0; 744 out0[2] = 0; 745 out0[1] = 0; 746 out0[0] = 0; 747 break; 748 } 749} 750 751DECLSPEC void append_block1_optimized (const u32 offset, u32 *buf0, u32 *buf1, const u32 src_r0) 752{ 753 // this version works with 1 byte append only 754 const u32 value = src_r0 & 0xff; 755 756 const u32 tmp = value << 0 757 | value << 8 758 | value << 16 759 | value << 24; 760 761 u32 v[4]; 762 763 set_mark_1x4_S (v, offset); 764 765 const u32 offset16 = offset / 16; 766 767 append_helper_1x4_S (buf0, ((offset16 == 0) ? tmp : 0), v); 768 append_helper_1x4_S (buf1, ((offset16 == 1) ? tmp : 0), v); 769} 770 771DECLSPEC void append_block8_optimized (const u32 offset, u32 *buf0, u32 *buf1, const u32 *src_l0, const u32 *src_l1, const u32 *src_r0, const u32 *src_r1) 772{ 773 u32 s0 = 0; 774 u32 s1 = 0; 775 u32 s2 = 0; 776 u32 s3 = 0; 777 u32 s4 = 0; 778 u32 s5 = 0; 779 u32 s6 = 0; 780 u32 s7 = 0; 781 782 const int offset_switch = offset / 4; 783 784 #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC 785 const u32 src_r00 = src_r0[0]; 786 const u32 src_r01 = src_r0[1]; 787 const u32 src_r02 = src_r0[2]; 788 const u32 src_r03 = src_r0[3]; 789 const u32 src_r10 = src_r1[0]; 790 const u32 src_r11 = src_r1[1]; 791 const u32 src_r12 = src_r1[2]; 792 const u32 src_r13 = src_r1[3]; 793 794 switch (offset_switch) 795 { 796 case 0: 797 s7 = hc_bytealign_S (src_r12, src_r13, offset); 798 s6 = hc_bytealign_S (src_r11, src_r12, offset); 799 s5 = hc_bytealign_S (src_r10, src_r11, offset); 800 s4 = hc_bytealign_S (src_r03, src_r10, offset); 801 s3 = hc_bytealign_S (src_r02, src_r03, offset); 802 s2 = hc_bytealign_S (src_r01, src_r02, offset); 803 s1 = hc_bytealign_S (src_r00, src_r01, offset); 804 s0 = hc_bytealign_S ( 0, src_r00, offset); 805 break; 806 807 case 1: 808 s7 = hc_bytealign_S (src_r11, src_r12, offset); 809 s6 = hc_bytealign_S (src_r10, src_r11, offset); 810 s5 = hc_bytealign_S (src_r03, src_r10, offset); 811 s4 = hc_bytealign_S (src_r02, src_r03, offset); 812 s3 = hc_bytealign_S (src_r01, src_r02, offset); 813 s2 = hc_bytealign_S (src_r00, src_r01, offset); 814 s1 = hc_bytealign_S ( 0, src_r00, offset); 815 s0 = 0; 816 break; 817 818 case 2: 819 s7 = hc_bytealign_S (src_r10, src_r11, offset); 820 s6 = hc_bytealign_S (src_r03, src_r10, offset); 821 s5 = hc_bytealign_S (src_r02, src_r03, offset); 822 s4 = hc_bytealign_S (src_r01, src_r02, offset); 823 s3 = hc_bytealign_S (src_r00, src_r01, offset); 824 s2 = hc_bytealign_S ( 0, src_r00, offset); 825 s1 = 0; 826 s0 = 0; 827 break; 828 829 case 3: 830 s7 = hc_bytealign_S (src_r03, src_r10, offset); 831 s6 = hc_bytealign_S (src_r02, src_r03, offset); 832 s5 = hc_bytealign_S (src_r01, src_r02, offset); 833 s4 = hc_bytealign_S (src_r00, src_r01, offset); 834 s3 = hc_bytealign_S ( 0, src_r00, offset); 835 s2 = 0; 836 s1 = 0; 837 s0 = 0; 838 839 break; 840 841 case 4: 842 s7 = hc_bytealign_S (src_r02, src_r03, offset); 843 s6 = hc_bytealign_S (src_r01, src_r02, offset); 844 s5 = hc_bytealign_S (src_r00, src_r01, offset); 845 s4 = hc_bytealign_S ( 0, src_r00, offset); 846 s3 = 0; 847 s2 = 0; 848 s1 = 0; 849 s0 = 0; 850 break; 851 852 case 5: 853 s7 = hc_bytealign_S (src_r01, src_r02, offset); 854 s6 = hc_bytealign_S (src_r00, src_r01, offset); 855 s5 = hc_bytealign_S ( 0, src_r00, offset); 856 s4 = 0; 857 s3 = 0; 858 s2 = 0; 859 s1 = 0; 860 s0 = 0; 861 break; 862 863 case 6: 864 s7 = hc_bytealign_S (src_r00, src_r01, offset); 865 s6 = hc_bytealign_S ( 0, src_r00, offset); 866 s5 = 0; 867 s4 = 0; 868 s3 = 0; 869 s2 = 0; 870 s1 = 0; 871 s0 = 0; 872 break; 873 874 case 7: 875 s7 = hc_bytealign_S ( 0, src_r00, offset); 876 s6 = 0; 877 s5 = 0; 878 s4 = 0; 879 s3 = 0; 880 s2 = 0; 881 s1 = 0; 882 s0 = 0; 883 break; 884 } 885 #endif 886 887 #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV 888 889 const int offset_mod_4 = offset & 3; 890 891 const int offset_minus_4 = 4 - offset_mod_4; 892 893 #if defined IS_NV 894 const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; 895 #endif 896 897 #if (defined IS_AMD || defined IS_HIP) 898 const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); 899 #endif 900 901 const u32 src_r00 = src_r0[0]; 902 const u32 src_r01 = src_r0[1]; 903 const u32 src_r02 = src_r0[2]; 904 const u32 src_r03 = src_r0[3]; 905 const u32 src_r10 = src_r1[0]; 906 const u32 src_r11 = src_r1[1]; 907 const u32 src_r12 = src_r1[2]; 908 const u32 src_r13 = src_r1[3]; 909 910 switch (offset_switch) 911 { 912 case 0: 913 s7 = hc_byte_perm_S (src_r12, src_r13, selector); 914 s6 = hc_byte_perm_S (src_r11, src_r12, selector); 915 s5 = hc_byte_perm_S (src_r10, src_r11, selector); 916 s4 = hc_byte_perm_S (src_r03, src_r10, selector); 917 s3 = hc_byte_perm_S (src_r02, src_r03, selector); 918 s2 = hc_byte_perm_S (src_r01, src_r02, selector); 919 s1 = hc_byte_perm_S (src_r00, src_r01, selector); 920 s0 = hc_byte_perm_S ( 0, src_r00, selector); 921 break; 922 923 case 1: 924 s7 = hc_byte_perm_S (src_r11, src_r12, selector); 925 s6 = hc_byte_perm_S (src_r10, src_r11, selector); 926 s5 = hc_byte_perm_S (src_r03, src_r10, selector); 927 s4 = hc_byte_perm_S (src_r02, src_r03, selector); 928 s3 = hc_byte_perm_S (src_r01, src_r02, selector); 929 s2 = hc_byte_perm_S (src_r00, src_r01, selector); 930 s1 = hc_byte_perm_S ( 0, src_r00, selector); 931 s0 = 0; 932 break; 933 934 case 2: 935 s7 = hc_byte_perm_S (src_r10, src_r11, selector); 936 s6 = hc_byte_perm_S (src_r03, src_r10, selector); 937 s5 = hc_byte_perm_S (src_r02, src_r03, selector); 938 s4 = hc_byte_perm_S (src_r01, src_r02, selector); 939 s3 = hc_byte_perm_S (src_r00, src_r01, selector); 940 s2 = hc_byte_perm_S ( 0, src_r00, selector); 941 s1 = 0; 942 s0 = 0; 943 break; 944 945 case 3: 946 s7 = hc_byte_perm_S (src_r03, src_r10, selector); 947 s6 = hc_byte_perm_S (src_r02, src_r03, selector); 948 s5 = hc_byte_perm_S (src_r01, src_r02, selector); 949 s4 = hc_byte_perm_S (src_r00, src_r01, selector); 950 s3 = hc_byte_perm_S ( 0, src_r00, selector); 951 s2 = 0; 952 s1 = 0; 953 s0 = 0; 954 955 break; 956 957 case 4: 958 s7 = hc_byte_perm_S (src_r02, src_r03, selector); 959 s6 = hc_byte_perm_S (src_r01, src_r02, selector); 960 s5 = hc_byte_perm_S (src_r00, src_r01, selector); 961 s4 = hc_byte_perm_S ( 0, src_r00, selector); 962 s3 = 0; 963 s2 = 0; 964 s1 = 0; 965 s0 = 0; 966 break; 967 968 case 5: 969 s7 = hc_byte_perm_S (src_r01, src_r02, selector); 970 s6 = hc_byte_perm_S (src_r00, src_r01, selector); 971 s5 = hc_byte_perm_S ( 0, src_r00, selector); 972 s4 = 0; 973 s3 = 0; 974 s2 = 0; 975 s1 = 0; 976 s0 = 0; 977 break; 978 979 case 6: 980 s7 = hc_byte_perm_S (src_r00, src_r01, selector); 981 s6 = hc_byte_perm_S ( 0, src_r00, selector); 982 s5 = 0; 983 s4 = 0; 984 s3 = 0; 985 s2 = 0; 986 s1 = 0; 987 s0 = 0; 988 break; 989 990 case 7: 991 s7 = hc_byte_perm_S ( 0, src_r00, selector); 992 s6 = 0; 993 s5 = 0; 994 s4 = 0; 995 s3 = 0; 996 s2 = 0; 997 s1 = 0; 998 s0 = 0; 999 break; 1000 } 1001 #endif 1002 1003 buf0[0] = src_l0[0] | s0; 1004 buf0[1] = src_l0[1] | s1; 1005 buf0[2] = src_l0[2] | s2; 1006 buf0[3] = src_l0[3] | s3; 1007 buf1[0] = src_l1[0] | s4; 1008 buf1[1] = src_l1[1] | s5; 1009 buf1[2] = src_l1[2] | s6; 1010 buf1[3] = src_l1[3] | s7; 1011} 1012 1013DECLSPEC void reverse_block_optimized (u32 *in0, u32 *in1, u32 *out0, u32 *out1, const u32 len) 1014{ 1015 rshift_block_optimized_N (in0, in1, out0, out1, 32 - len); 1016 1017 u32 tib40[4]; 1018 u32 tib41[4]; 1019 1020 tib40[0] = out1[3]; 1021 tib40[1] = out1[2]; 1022 tib40[2] = out1[1]; 1023 tib40[3] = out1[0]; 1024 tib41[0] = out0[3]; 1025 tib41[1] = out0[2]; 1026 tib41[2] = out0[1]; 1027 tib41[3] = out0[0]; 1028 1029 out0[0] = hc_swap32_S (tib40[0]); 1030 out0[1] = hc_swap32_S (tib40[1]); 1031 out0[2] = hc_swap32_S (tib40[2]); 1032 out0[3] = hc_swap32_S (tib40[3]); 1033 out1[0] = hc_swap32_S (tib41[0]); 1034 out1[1] = hc_swap32_S (tib41[1]); 1035 out1[2] = hc_swap32_S (tib41[2]); 1036 out1[3] = hc_swap32_S (tib41[3]); 1037} 1038 1039DECLSPEC void exchange_byte_optimized (u32 *buf, const int off_src, const int off_dst) 1040{ 1041 u8 *ptr = (u8 *) buf; 1042 1043 const u8 tmp = ptr[off_src]; 1044 1045 ptr[off_src] = ptr[off_dst]; 1046 ptr[off_dst] = tmp; 1047} 1048 1049DECLSPEC u32 rule_op_mangle_lrest (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1050{ 1051 u32 t; 1052 1053 t = buf0[0]; buf0[0] = t | generate_cmask_optimized (t); 1054 t = buf0[1]; buf0[1] = t | generate_cmask_optimized (t); 1055 t = buf0[2]; buf0[2] = t | generate_cmask_optimized (t); 1056 t = buf0[3]; buf0[3] = t | generate_cmask_optimized (t); 1057 t = buf1[0]; buf1[0] = t | generate_cmask_optimized (t); 1058 t = buf1[1]; buf1[1] = t | generate_cmask_optimized (t); 1059 t = buf1[2]; buf1[2] = t | generate_cmask_optimized (t); 1060 t = buf1[3]; buf1[3] = t | generate_cmask_optimized (t); 1061 1062 return in_len; 1063} 1064 1065DECLSPEC u32 rule_op_mangle_urest (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1066{ 1067 u32 t; 1068 1069 t = buf0[0]; buf0[0] = t & ~(generate_cmask_optimized (t)); 1070 t = buf0[1]; buf0[1] = t & ~(generate_cmask_optimized (t)); 1071 t = buf0[2]; buf0[2] = t & ~(generate_cmask_optimized (t)); 1072 t = buf0[3]; buf0[3] = t & ~(generate_cmask_optimized (t)); 1073 t = buf1[0]; buf1[0] = t & ~(generate_cmask_optimized (t)); 1074 t = buf1[1]; buf1[1] = t & ~(generate_cmask_optimized (t)); 1075 t = buf1[2]; buf1[2] = t & ~(generate_cmask_optimized (t)); 1076 t = buf1[3]; buf1[3] = t & ~(generate_cmask_optimized (t)); 1077 1078 return in_len; 1079} 1080 1081DECLSPEC u32 rule_op_mangle_lrest_ufirst (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1082{ 1083 u32 t; 1084 1085 t = buf0[0]; buf0[0] = t | generate_cmask_optimized (t); 1086 t = buf0[1]; buf0[1] = t | generate_cmask_optimized (t); 1087 t = buf0[2]; buf0[2] = t | generate_cmask_optimized (t); 1088 t = buf0[3]; buf0[3] = t | generate_cmask_optimized (t); 1089 t = buf1[0]; buf1[0] = t | generate_cmask_optimized (t); 1090 t = buf1[1]; buf1[1] = t | generate_cmask_optimized (t); 1091 t = buf1[2]; buf1[2] = t | generate_cmask_optimized (t); 1092 t = buf1[3]; buf1[3] = t | generate_cmask_optimized (t); 1093 1094 t = buf0[0]; buf0[0] = t & ~(0x00000020 & generate_cmask_optimized (t)); 1095 1096 return in_len; 1097} 1098 1099DECLSPEC u32 rule_op_mangle_urest_lfirst (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1100{ 1101 u32 t; 1102 1103 t = buf0[0]; buf0[0] = t & ~(generate_cmask_optimized (t)); 1104 t = buf0[1]; buf0[1] = t & ~(generate_cmask_optimized (t)); 1105 t = buf0[2]; buf0[2] = t & ~(generate_cmask_optimized (t)); 1106 t = buf0[3]; buf0[3] = t & ~(generate_cmask_optimized (t)); 1107 t = buf1[0]; buf1[0] = t & ~(generate_cmask_optimized (t)); 1108 t = buf1[1]; buf1[1] = t & ~(generate_cmask_optimized (t)); 1109 t = buf1[2]; buf1[2] = t & ~(generate_cmask_optimized (t)); 1110 t = buf1[3]; buf1[3] = t & ~(generate_cmask_optimized (t)); 1111 1112 t = buf0[0]; buf0[0] = t | (0x00000020 & generate_cmask_optimized (t)); 1113 1114 return in_len; 1115} 1116 1117DECLSPEC u32 rule_op_mangle_trest (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1118{ 1119 u32 t; 1120 1121 t = buf0[0]; buf0[0] = t ^ generate_cmask_optimized (t); 1122 t = buf0[1]; buf0[1] = t ^ generate_cmask_optimized (t); 1123 t = buf0[2]; buf0[2] = t ^ generate_cmask_optimized (t); 1124 t = buf0[3]; buf0[3] = t ^ generate_cmask_optimized (t); 1125 t = buf1[0]; buf1[0] = t ^ generate_cmask_optimized (t); 1126 t = buf1[1]; buf1[1] = t ^ generate_cmask_optimized (t); 1127 t = buf1[2]; buf1[2] = t ^ generate_cmask_optimized (t); 1128 t = buf1[3]; buf1[3] = t ^ generate_cmask_optimized (t); 1129 1130 return in_len; 1131} 1132 1133DECLSPEC u32 rule_op_mangle_toggle_at (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1134{ 1135 if (p0 >= in_len) return in_len; 1136 1137 u32 t[8]; 1138 1139 t[0] = buf0[0]; 1140 t[1] = buf0[1]; 1141 t[2] = buf0[2]; 1142 t[3] = buf0[3]; 1143 t[4] = buf1[0]; 1144 t[5] = buf1[1]; 1145 t[6] = buf1[2]; 1146 t[7] = buf1[3]; 1147 1148 const u32 tmp = t[p0 / 4]; 1149 1150 const u32 m = 0x20u << ((p0 & 3) * 8); 1151 1152 t[p0 / 4] = tmp ^ (m & generate_cmask_optimized (tmp)); 1153 1154 buf0[0] = t[0]; 1155 buf0[1] = t[1]; 1156 buf0[2] = t[2]; 1157 buf0[3] = t[3]; 1158 buf1[0] = t[4]; 1159 buf1[1] = t[5]; 1160 buf1[2] = t[6]; 1161 buf1[3] = t[7]; 1162 1163 return (in_len); 1164} 1165 1166DECLSPEC u32 rule_op_mangle_toggle_at_sep (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1167{ 1168 if (in_len == 0) return in_len; 1169 1170 u32 r0 = search_on_register (buf0[0], p1); 1171 u32 r1 = search_on_register (buf0[1], p1); 1172 u32 r2 = search_on_register (buf0[2], p1); 1173 u32 r3 = search_on_register (buf0[3], p1); 1174 u32 r4 = search_on_register (buf1[0], p1); 1175 u32 r5 = search_on_register (buf1[1], p1); 1176 u32 r6 = search_on_register (buf1[2], p1); 1177 u32 r7 = search_on_register (buf1[3], p1); 1178 1179 const u32 rn = (r0 << 0) 1180 | (r1 << 4) 1181 | (r2 << 8) 1182 | (r3 << 12) 1183 | (r4 << 16) 1184 | (r5 << 20) 1185 | (r6 << 24) 1186 | (r7 << 28); 1187 1188 if (rn == 0) return in_len; 1189 1190 u32 occurence = 0; 1191 1192 u32 ro = 0; 1193 1194 #ifdef _unroll 1195 #pragma unroll 1196 #endif 1197 for (int i = 0; i < 32; i++) 1198 { 1199 if ((rn >> i) & 1) 1200 { 1201 if (occurence == p0) 1202 { 1203 ro = 1 << i; 1204 1205 break; 1206 } 1207 1208 occurence++; 1209 } 1210 } 1211 1212 r0 = (ro >> 0) & 15; 1213 r1 = (ro >> 4) & 15; 1214 r2 = (ro >> 8) & 15; 1215 r3 = (ro >> 12) & 15; 1216 r4 = (ro >> 16) & 15; 1217 r5 = (ro >> 20) & 15; 1218 r6 = (ro >> 24) & 15; 1219 r7 = (ro >> 28) & 15; 1220 1221 r0 <<= 1; 1222 r1 <<= 1; r1 |= r0 >> 4; 1223 r2 <<= 1; r2 |= r1 >> 4; 1224 r3 <<= 1; r3 |= r2 >> 4; 1225 r4 <<= 1; r4 |= r3 >> 4; 1226 r5 <<= 1; r5 |= r4 >> 4; 1227 r6 <<= 1; r6 |= r5 >> 4; 1228 r7 <<= 1; r7 |= r6 >> 4; 1229 1230 buf0[0] = toggle_on_register (buf0[0], r0); 1231 buf0[1] = toggle_on_register (buf0[1], r1); 1232 buf0[2] = toggle_on_register (buf0[2], r2); 1233 buf0[3] = toggle_on_register (buf0[3], r3); 1234 buf1[0] = toggle_on_register (buf1[0], r4); 1235 buf1[1] = toggle_on_register (buf1[1], r5); 1236 buf1[2] = toggle_on_register (buf1[2], r6); 1237 buf1[3] = toggle_on_register (buf1[3], r7); 1238 1239 return in_len; 1240} 1241 1242DECLSPEC u32 rule_op_mangle_reverse (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1243{ 1244 reverse_block_optimized (buf0, buf1, buf0, buf1, in_len); 1245 1246 return in_len; 1247} 1248 1249DECLSPEC u32 rule_op_mangle_dupeword (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1250{ 1251 if ((in_len + in_len) >= 32) return in_len; 1252 1253 u32 out_len = in_len; 1254 1255 append_block8_optimized (out_len, buf0, buf1, buf0, buf1, buf0, buf1); 1256 1257 out_len += in_len; 1258 1259 return out_len; 1260} 1261 1262DECLSPEC u32 rule_op_mangle_dupeword_times (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1263{ 1264 if (((in_len * p0) + in_len) >= 32) return in_len; 1265 1266 u32 out_len = in_len; 1267 1268 u32 tib40[4]; 1269 u32 tib41[4]; 1270 1271 tib40[0] = buf0[0]; 1272 tib40[1] = buf0[1]; 1273 tib40[2] = buf0[2]; 1274 tib40[3] = buf0[3]; 1275 tib41[0] = buf1[0]; 1276 tib41[1] = buf1[1]; 1277 tib41[2] = buf1[2]; 1278 tib41[3] = buf1[3]; 1279 1280 for (u32 i = 0; i < p0; i++) 1281 { 1282 append_block8_optimized (out_len, buf0, buf1, buf0, buf1, tib40, tib41); 1283 1284 out_len += in_len; 1285 } 1286 1287 return out_len; 1288} 1289 1290DECLSPEC u32 rule_op_mangle_reflect (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1291{ 1292 if ((in_len + in_len) >= 32) return in_len; 1293 1294 u32 out_len = in_len; 1295 1296 u32 tib40[4] = { 0 }; 1297 u32 tib41[4] = { 0 }; 1298 1299 reverse_block_optimized (buf0, buf1, tib40, tib41, out_len); 1300 1301 append_block8_optimized (out_len, buf0, buf1, buf0, buf1, tib40, tib41); 1302 1303 out_len += in_len; 1304 1305 return out_len; 1306} 1307 1308DECLSPEC u32 rule_op_mangle_append (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1309{ 1310 if ((in_len + 1) >= 32) return in_len; 1311 1312 u32 out_len = in_len; 1313 1314 append_block1_optimized (out_len, buf0, buf1, p0); 1315 1316 out_len++; 1317 1318 return out_len; 1319} 1320 1321DECLSPEC u32 rule_op_mangle_prepend (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1322{ 1323 if ((in_len + 1) >= 32) return in_len; 1324 1325 u32 out_len = in_len; 1326 1327 rshift_block_optimized (buf0, buf1, buf0, buf1); 1328 1329 buf0[0] = buf0[0] | p0; 1330 1331 out_len++; 1332 1333 return out_len; 1334} 1335 1336DECLSPEC u32 rule_op_mangle_rotate_left (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1337{ 1338 if (in_len == 0) return in_len; 1339 1340 const u32 in_len1 = in_len - 1; 1341 1342 const u32 tmp = buf0[0]; 1343 1344 lshift_block_optimized (buf0, buf1, buf0, buf1); 1345 1346 append_block1_optimized (in_len1, buf0, buf1, tmp); 1347 1348 return in_len; 1349} 1350 1351DECLSPEC u32 rule_op_mangle_rotate_right (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1352{ 1353 if (in_len == 0) return in_len; 1354 1355 const u32 in_len1 = in_len - 1; 1356 1357 const u32 sh = (in_len1 & 3) * 8; 1358 1359 u32 tmp = 0; 1360 1361 u32 v[4]; 1362 1363 set_mark_1x4_S (v, in_len1); 1364 1365 switch (in_len1 / 16) 1366 { 1367 case 0: 1368 tmp |= buf0[0] & v[0]; 1369 tmp |= buf0[1] & v[1]; 1370 tmp |= buf0[2] & v[2]; 1371 tmp |= buf0[3] & v[3]; 1372 break; 1373 1374 case 1: 1375 tmp |= buf1[0] & v[0]; 1376 tmp |= buf1[1] & v[1]; 1377 tmp |= buf1[2] & v[2]; 1378 tmp |= buf1[3] & v[3]; 1379 break; 1380 } 1381 1382 tmp = (tmp >> sh) & 0xff; 1383 1384 rshift_block_optimized (buf0, buf1, buf0, buf1); 1385 1386 buf0[0] |= tmp; 1387 1388 truncate_right_optimized (buf0, buf1, in_len); 1389 1390 return in_len; 1391} 1392 1393DECLSPEC u32 rule_op_mangle_delete_first (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1394{ 1395 if (in_len == 0) return in_len; 1396 1397 const u32 in_len1 = in_len - 1; 1398 1399 lshift_block_optimized (buf0, buf1, buf0, buf1); 1400 1401 return in_len1; 1402} 1403 1404DECLSPEC u32 rule_op_mangle_delete_last (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1405{ 1406 if (in_len == 0) return in_len; 1407 1408 const u32 in_len1 = in_len - 1; 1409 1410 const u32 mask = (1 << ((in_len1 & 3) * 8)) - 1; 1411 1412 buf0[0] &= (in_len1 < 4) ? mask : 0xffffffff; 1413 buf0[1] &= ((in_len1 >= 4) && (in_len1 < 8)) ? mask : 0xffffffff; 1414 buf0[2] &= ((in_len1 >= 8) && (in_len1 < 12)) ? mask : 0xffffffff; 1415 buf0[3] &= ((in_len1 >= 12) && (in_len1 < 16)) ? mask : 0xffffffff; 1416 buf1[0] &= ((in_len1 >= 16) && (in_len1 < 20)) ? mask : 0xffffffff; 1417 buf1[1] &= ((in_len1 >= 20) && (in_len1 < 24)) ? mask : 0xffffffff; 1418 buf1[2] &= ((in_len1 >= 24) && (in_len1 < 28)) ? mask : 0xffffffff; 1419 buf1[3] &= (in_len1 >= 28) ? mask : 0xffffffff; 1420 1421 return in_len1; 1422} 1423 1424DECLSPEC u32 rule_op_mangle_delete_at (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1425{ 1426 if (p0 >= in_len) return in_len; 1427 1428 u32 out_len = in_len; 1429 1430 u32 tib40[4]; 1431 u32 tib41[4]; 1432 1433 lshift_block_optimized (buf0, buf1, tib40, tib41); 1434 1435 const u32 ml = (1 << ((p0 & 3) * 8)) - 1; 1436 const u32 mr = ~ml; 1437 1438 const int p0_switch = p0 / 4; 1439 1440 switch (p0_switch) 1441 { 1442 case 0: buf0[0] = (buf0[0] & ml) 1443 | (tib40[0] & mr); 1444 buf0[1] = tib40[1]; 1445 buf0[2] = tib40[2]; 1446 buf0[3] = tib40[3]; 1447 buf1[0] = tib41[0]; 1448 buf1[1] = tib41[1]; 1449 buf1[2] = tib41[2]; 1450 buf1[3] = tib41[3]; 1451 break; 1452 case 1: buf0[1] = (buf0[1] & ml) 1453 | (tib40[1] & mr); 1454 buf0[2] = tib40[2]; 1455 buf0[3] = tib40[3]; 1456 buf1[0] = tib41[0]; 1457 buf1[1] = tib41[1]; 1458 buf1[2] = tib41[2]; 1459 buf1[3] = tib41[3]; 1460 break; 1461 case 2: buf0[2] = (buf0[2] & ml) 1462 | (tib40[2] & mr); 1463 buf0[3] = tib40[3]; 1464 buf1[0] = tib41[0]; 1465 buf1[1] = tib41[1]; 1466 buf1[2] = tib41[2]; 1467 buf1[3] = tib41[3]; 1468 break; 1469 case 3: buf0[3] = (buf0[3] & ml) 1470 | (tib40[3] & mr); 1471 buf1[0] = tib41[0]; 1472 buf1[1] = tib41[1]; 1473 buf1[2] = tib41[2]; 1474 buf1[3] = tib41[3]; 1475 break; 1476 case 4: buf1[0] = (buf1[0] & ml) 1477 | (tib41[0] & mr); 1478 buf1[1] = tib41[1]; 1479 buf1[2] = tib41[2]; 1480 buf1[3] = tib41[3]; 1481 break; 1482 case 5: buf1[1] = (buf1[1] & ml) 1483 | (tib41[1] & mr); 1484 buf1[2] = tib41[2]; 1485 buf1[3] = tib41[3]; 1486 break; 1487 case 6: buf1[2] = (buf1[2] & ml) 1488 | (tib41[2] & mr); 1489 buf1[3] = tib41[3]; 1490 break; 1491 case 7: buf1[3] = (buf1[3] & ml) 1492 | (tib41[3] & mr); 1493 break; 1494 } 1495 1496 out_len--; 1497 1498 return out_len; 1499} 1500 1501DECLSPEC u32 rule_op_mangle_extract (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1502{ 1503 if (p0 >= in_len) return in_len; 1504 1505 if ((p0 + p1) > in_len) return in_len; 1506 1507 u32 out_len = p1; 1508 1509 lshift_block_optimized_N (buf0, buf1, buf0, buf1, p0); 1510 1511 truncate_right_optimized (buf0, buf1, out_len); 1512 1513 return out_len; 1514} 1515 1516DECLSPEC u32 rule_op_mangle_omit (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1517{ 1518 if (p0 >= in_len) return in_len; 1519 1520 if ((p0 + p1) > in_len) return in_len; 1521 1522 u32 out_len = in_len; 1523 1524 u32 tib40[4]; 1525 u32 tib41[4]; 1526 1527 tib40[0] = 0; 1528 tib40[1] = 0; 1529 tib40[2] = 0; 1530 tib40[3] = 0; 1531 tib41[0] = 0; 1532 tib41[1] = 0; 1533 tib41[2] = 0; 1534 tib41[3] = 0; 1535 1536 lshift_block_optimized_N (buf0, buf1, tib40, tib41, p1); 1537 1538 const u32 ml = (1 << ((p0 & 3) * 8)) - 1; 1539 const u32 mr = ~ml; 1540 1541 const int p0_switch = p0 / 4; 1542 1543 switch (p0_switch) 1544 { 1545 case 0: buf0[0] = (buf0[0] & ml) 1546 | (tib40[0] & mr); 1547 buf0[1] = tib40[1]; 1548 buf0[2] = tib40[2]; 1549 buf0[3] = tib40[3]; 1550 buf1[0] = tib41[0]; 1551 buf1[1] = tib41[1]; 1552 buf1[2] = tib41[2]; 1553 buf1[3] = tib41[3]; 1554 break; 1555 case 1: buf0[1] = (buf0[1] & ml) 1556 | (tib40[1] & mr); 1557 buf0[2] = tib40[2]; 1558 buf0[3] = tib40[3]; 1559 buf1[0] = tib41[0]; 1560 buf1[1] = tib41[1]; 1561 buf1[2] = tib41[2]; 1562 buf1[3] = tib41[3]; 1563 break; 1564 case 2: buf0[2] = (buf0[2] & ml) 1565 | (tib40[2] & mr); 1566 buf0[3] = tib40[3]; 1567 buf1[0] = tib41[0]; 1568 buf1[1] = tib41[1]; 1569 buf1[2] = tib41[2]; 1570 buf1[3] = tib41[3]; 1571 break; 1572 case 3: buf0[3] = (buf0[3] & ml) 1573 | (tib40[3] & mr); 1574 buf1[0] = tib41[0]; 1575 buf1[1] = tib41[1]; 1576 buf1[2] = tib41[2]; 1577 buf1[3] = tib41[3]; 1578 break; 1579 case 4: buf1[0] = (buf1[0] & ml) 1580 | (tib41[0] & mr); 1581 buf1[1] = tib41[1]; 1582 buf1[2] = tib41[2]; 1583 buf1[3] = tib41[3]; 1584 break; 1585 case 5: buf1[1] = (buf1[1] & ml) 1586 | (tib41[1] & mr); 1587 buf1[2] = tib41[2]; 1588 buf1[3] = tib41[3]; 1589 break; 1590 case 6: buf1[2] = (buf1[2] & ml) 1591 | (tib41[2] & mr); 1592 buf1[3] = tib41[3]; 1593 break; 1594 case 7: buf1[3] = (buf1[3] & ml) 1595 | (tib41[3] & mr); 1596 break; 1597 } 1598 1599 out_len -= p1; 1600 1601 return out_len; 1602} 1603 1604DECLSPEC u32 rule_op_mangle_insert (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1605{ 1606 if (p0 > in_len) return in_len; 1607 1608 if ((in_len + 1) >= 32) return in_len; 1609 1610 u32 out_len = in_len; 1611 1612 u32 tib40[4]; 1613 u32 tib41[4]; 1614 1615 rshift_block_optimized (buf0, buf1, tib40, tib41); 1616 1617 const u32 p1n = p1 << ((p0 & 3) * 8); 1618 1619 const u32 ml = (1 << ((p0 & 3) * 8)) - 1; 1620 1621 const u32 mr = 0xffffff00 << ((p0 & 3) * 8); 1622 1623 const int p0_switch = p0 / 4; 1624 1625 switch (p0_switch) 1626 { 1627 case 0: buf0[0] = (buf0[0] & ml) | p1n | (tib40[0] & mr); 1628 buf0[1] = tib40[1]; 1629 buf0[2] = tib40[2]; 1630 buf0[3] = tib40[3]; 1631 buf1[0] = tib41[0]; 1632 buf1[1] = tib41[1]; 1633 buf1[2] = tib41[2]; 1634 buf1[3] = tib41[3]; 1635 break; 1636 case 1: buf0[1] = (buf0[1] & ml) | p1n | (tib40[1] & mr); 1637 buf0[2] = tib40[2]; 1638 buf0[3] = tib40[3]; 1639 buf1[0] = tib41[0]; 1640 buf1[1] = tib41[1]; 1641 buf1[2] = tib41[2]; 1642 buf1[3] = tib41[3]; 1643 break; 1644 case 2: buf0[2] = (buf0[2] & ml) | p1n | (tib40[2] & mr); 1645 buf0[3] = tib40[3]; 1646 buf1[0] = tib41[0]; 1647 buf1[1] = tib41[1]; 1648 buf1[2] = tib41[2]; 1649 buf1[3] = tib41[3]; 1650 break; 1651 case 3: buf0[3] = (buf0[3] & ml) | p1n | (tib40[3] & mr); 1652 buf1[0] = tib41[0]; 1653 buf1[1] = tib41[1]; 1654 buf1[2] = tib41[2]; 1655 buf1[3] = tib41[3]; 1656 break; 1657 case 4: buf1[0] = (buf1[0] & ml) | p1n | (tib41[0] & mr); 1658 buf1[1] = tib41[1]; 1659 buf1[2] = tib41[2]; 1660 buf1[3] = tib41[3]; 1661 break; 1662 case 5: buf1[1] = (buf1[1] & ml) | p1n | (tib41[1] & mr); 1663 buf1[2] = tib41[2]; 1664 buf1[3] = tib41[3]; 1665 break; 1666 case 6: buf1[2] = (buf1[2] & ml) | p1n | (tib41[2] & mr); 1667 buf1[3] = tib41[3]; 1668 break; 1669 case 7: buf1[3] = (buf1[3] & ml) | p1n | (tib41[3] & mr); 1670 break; 1671 } 1672 1673 out_len++; 1674 1675 return out_len; 1676} 1677 1678DECLSPEC u32 rule_op_mangle_overstrike (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1679{ 1680 if (p0 >= in_len) return in_len; 1681 1682 const u32 p1n = p1 << ((p0 & 3) * 8); 1683 1684 const u32 m = ~(0xffu << ((p0 & 3) * 8)); 1685 1686 u32 t[8]; 1687 1688 t[0] = buf0[0]; 1689 t[1] = buf0[1]; 1690 t[2] = buf0[2]; 1691 t[3] = buf0[3]; 1692 t[4] = buf1[0]; 1693 t[5] = buf1[1]; 1694 t[6] = buf1[2]; 1695 t[7] = buf1[3]; 1696 1697 const u32 tmp = t[p0 / 4]; 1698 1699 t[p0 / 4] = (tmp & m) | p1n; 1700 1701 buf0[0] = t[0]; 1702 buf0[1] = t[1]; 1703 buf0[2] = t[2]; 1704 buf0[3] = t[3]; 1705 buf1[0] = t[4]; 1706 buf1[1] = t[5]; 1707 buf1[2] = t[6]; 1708 buf1[3] = t[7]; 1709 1710 return in_len; 1711} 1712 1713DECLSPEC u32 rule_op_mangle_truncate_at (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1714{ 1715 if (p0 >= in_len) return in_len; 1716 1717 truncate_right_optimized (buf0, buf1, p0); 1718 1719 return p0; 1720} 1721 1722DECLSPEC u32 search_on_register (const u32 in, const u32 p0) 1723{ 1724 u32 r = 0; 1725 1726 if (hc_bfe_S (in, 0, 8) == p0) r |= 1; 1727 if (hc_bfe_S (in, 8, 8) == p0) r |= 2; 1728 if (hc_bfe_S (in, 16, 8) == p0) r |= 4; 1729 if (hc_bfe_S (in, 24, 8) == p0) r |= 8; 1730 1731 return r; 1732} 1733 1734DECLSPEC u32 replace_on_register (const u32 in, const u32 r, const u32 p1) 1735{ 1736 u32 out = in; 1737 1738 if (r & 1) out = (out & 0xffffff00) | (p1 << 0); 1739 if (r & 2) out = (out & 0xffff00ff) | (p1 << 8); 1740 if (r & 4) out = (out & 0xff00ffff) | (p1 << 16); 1741 if (r & 8) out = (out & 0x00ffffff) | (p1 << 24); 1742 1743 return out; 1744} 1745 1746DECLSPEC u32 rule_op_mangle_replace (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1747{ 1748 const u32 r0 = search_on_register (buf0[0], p0); 1749 const u32 r1 = search_on_register (buf0[1], p0); 1750 const u32 r2 = search_on_register (buf0[2], p0); 1751 const u32 r3 = search_on_register (buf0[3], p0); 1752 const u32 r4 = search_on_register (buf1[0], p0); 1753 const u32 r5 = search_on_register (buf1[1], p0); 1754 const u32 r6 = search_on_register (buf1[2], p0); 1755 const u32 r7 = search_on_register (buf1[3], p0); 1756 1757 const u32 rn = r0 + r1 + r2 + r3 + r4 + r5 + r6 + r7; 1758 1759 if (rn == 0) return in_len; 1760 1761 buf0[0] = replace_on_register (buf0[0], r0, p1); 1762 buf0[1] = replace_on_register (buf0[1], r1, p1); 1763 buf0[2] = replace_on_register (buf0[2], r2, p1); 1764 buf0[3] = replace_on_register (buf0[3], r3, p1); 1765 buf1[0] = replace_on_register (buf1[0], r4, p1); 1766 buf1[1] = replace_on_register (buf1[1], r5, p1); 1767 buf1[2] = replace_on_register (buf1[2], r6, p1); 1768 buf1[3] = replace_on_register (buf1[3], r7, p1); 1769 1770 return in_len; 1771} 1772 1773DECLSPEC u32 rule_op_mangle_purgechar (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1774{ 1775 const u32 r0 = search_on_register (buf0[0], p0); 1776 const u32 r1 = search_on_register (buf0[1], p0); 1777 const u32 r2 = search_on_register (buf0[2], p0); 1778 const u32 r3 = search_on_register (buf0[3], p0); 1779 const u32 r4 = search_on_register (buf1[0], p0); 1780 const u32 r5 = search_on_register (buf1[1], p0); 1781 const u32 r6 = search_on_register (buf1[2], p0); 1782 const u32 r7 = search_on_register (buf1[3], p0); 1783 1784 const u32 rn = r0 + r1 + r2 + r3 + r4 + r5 + r6 + r7; 1785 1786 if (rn == 0) return in_len; 1787 1788 u32 out_len = 0; 1789 1790 u32 buf_in[8]; 1791 1792 buf_in[0] = buf0[0]; 1793 buf_in[1] = buf0[1]; 1794 buf_in[2] = buf0[2]; 1795 buf_in[3] = buf0[3]; 1796 buf_in[4] = buf1[0]; 1797 buf_in[5] = buf1[1]; 1798 buf_in[6] = buf1[2]; 1799 buf_in[7] = buf1[3]; 1800 1801 u32 buf_out[8] = { 0 }; 1802 1803 u8 *in = (u8 *) buf_in; 1804 u8 *out = (u8 *) buf_out; 1805 1806 for (u32 pos = 0; pos < in_len; pos++) 1807 { 1808 if (in[pos] == (u8) p0) continue; 1809 1810 out[out_len] = in[pos]; 1811 1812 out_len++; 1813 } 1814 1815 buf0[0] = buf_out[0]; 1816 buf0[1] = buf_out[1]; 1817 buf0[2] = buf_out[2]; 1818 buf0[3] = buf_out[3]; 1819 buf1[0] = buf_out[4]; 1820 buf1[1] = buf_out[5]; 1821 buf1[2] = buf_out[6]; 1822 buf1[3] = buf_out[7]; 1823 1824 return out_len; 1825} 1826 1827DECLSPEC u32 rule_op_mangle_dupechar_first (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1828{ 1829 if ( in_len == 0) return in_len; 1830 if ((in_len + p0) >= 32) return in_len; 1831 1832 u32 out_len = in_len; 1833 1834 const u32 tmp = buf0[0] & 0xFF; 1835 1836 const u32 tmp32 = tmp << 0 1837 | tmp << 8 1838 | tmp << 16 1839 | tmp << 24; 1840 1841 rshift_block_optimized_N (buf0, buf1, buf0, buf1, p0); 1842 1843 u32 t0[4] = { tmp32, tmp32, tmp32, tmp32 }; 1844 u32 t1[4] = { tmp32, tmp32, tmp32, tmp32 }; 1845 1846 truncate_right_optimized (t0, t1, p0); 1847 1848 buf0[0] |= t0[0]; 1849 buf0[1] |= t0[1]; 1850 buf0[2] |= t0[2]; 1851 buf0[3] |= t0[3]; 1852 buf1[0] |= t1[0]; 1853 buf1[1] |= t1[1]; 1854 buf1[2] |= t1[2]; 1855 buf1[3] |= t1[3]; 1856 1857 out_len += p0; 1858 1859 return out_len; 1860} 1861 1862DECLSPEC u32 rule_op_mangle_dupechar_last (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1863{ 1864 if ( in_len == 0) return in_len; 1865 if ((in_len + p0) >= 32) return in_len; 1866 1867 const u32 in_len1 = in_len - 1; 1868 1869 const u32 sh = (in_len1 & 3) * 8; 1870 1871 u32 tmp = 0; 1872 1873 u32 v[4]; 1874 1875 set_mark_1x4_S (v, in_len1); 1876 1877 switch (in_len1 / 16) 1878 { 1879 case 0: 1880 tmp |= buf0[0] & v[0]; 1881 tmp |= buf0[1] & v[1]; 1882 tmp |= buf0[2] & v[2]; 1883 tmp |= buf0[3] & v[3]; 1884 break; 1885 1886 case 1: 1887 tmp |= buf1[0] & v[0]; 1888 tmp |= buf1[1] & v[1]; 1889 tmp |= buf1[2] & v[2]; 1890 tmp |= buf1[3] & v[3]; 1891 break; 1892 } 1893 1894 tmp = (tmp >> sh) & 0xff; 1895 1896 u32 out_len = in_len; 1897 1898 for (u32 i = 0; i < p0; i++) 1899 { 1900 append_block1_optimized (out_len, buf0, buf1, tmp); 1901 1902 out_len++; 1903 } 1904 1905 return out_len; 1906} 1907 1908DECLSPEC u32 rule_op_mangle_dupechar_all (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1909{ 1910 if ( in_len == 0) return in_len; 1911 if ((in_len + in_len) >= 32) return in_len; 1912 1913 u32 out_len = in_len; 1914 1915 u32 tib40[4]; 1916 u32 tib41[4]; 1917 1918 tib40[0] = ((buf0[0] & 0x000000FF) << 0) | ((buf0[0] & 0x0000FF00) << 8); 1919 tib40[1] = ((buf0[0] & 0x00FF0000) >> 16) | ((buf0[0] & 0xFF000000) >> 8); 1920 tib40[2] = ((buf0[1] & 0x000000FF) << 0) | ((buf0[1] & 0x0000FF00) << 8); 1921 tib40[3] = ((buf0[1] & 0x00FF0000) >> 16) | ((buf0[1] & 0xFF000000) >> 8); 1922 tib41[0] = ((buf0[2] & 0x000000FF) << 0) | ((buf0[2] & 0x0000FF00) << 8); 1923 tib41[1] = ((buf0[2] & 0x00FF0000) >> 16) | ((buf0[2] & 0xFF000000) >> 8); 1924 tib41[2] = ((buf0[3] & 0x000000FF) << 0) | ((buf0[3] & 0x0000FF00) << 8); 1925 tib41[3] = ((buf0[3] & 0x00FF0000) >> 16) | ((buf0[3] & 0xFF000000) >> 8); 1926 1927 buf0[0] = tib40[0] | (tib40[0] << 8); 1928 buf0[1] = tib40[1] | (tib40[1] << 8); 1929 buf0[2] = tib40[2] | (tib40[2] << 8); 1930 buf0[3] = tib40[3] | (tib40[3] << 8); 1931 buf1[0] = tib41[0] | (tib41[0] << 8); 1932 buf1[1] = tib41[1] | (tib41[1] << 8); 1933 buf1[2] = tib41[2] | (tib41[2] << 8); 1934 buf1[3] = tib41[3] | (tib41[3] << 8); 1935 1936 out_len = out_len + out_len; 1937 1938 return out_len; 1939} 1940 1941DECLSPEC u32 rule_op_mangle_switch_first (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1942{ 1943 if (in_len < 2) return in_len; 1944 1945 buf0[0] = (buf0[0] & 0xFFFF0000) | ((buf0[0] << 8) & 0x0000FF00) | ((buf0[0] >> 8) & 0x000000FF); 1946 1947 return in_len; 1948} 1949 1950DECLSPEC u32 rule_op_mangle_switch_last (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1951{ 1952 if (in_len < 2) return in_len; 1953 1954 u32 t[8]; 1955 1956 t[0] = buf0[0]; 1957 t[1] = buf0[1]; 1958 t[2] = buf0[2]; 1959 t[3] = buf0[3]; 1960 t[4] = buf1[0]; 1961 t[5] = buf1[1]; 1962 t[6] = buf1[2]; 1963 t[7] = buf1[3]; 1964 1965 exchange_byte_optimized (t, in_len - 2, in_len - 1); 1966 1967 buf0[0] = t[0]; 1968 buf0[1] = t[1]; 1969 buf0[2] = t[2]; 1970 buf0[3] = t[3]; 1971 buf1[0] = t[4]; 1972 buf1[1] = t[5]; 1973 buf1[2] = t[6]; 1974 buf1[3] = t[7]; 1975 1976 return in_len; 1977} 1978 1979DECLSPEC u32 rule_op_mangle_switch_at (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 1980{ 1981 if (p0 >= in_len) return in_len; 1982 if (p1 >= in_len) return in_len; 1983 1984 u32 t[8]; 1985 1986 t[0] = buf0[0]; 1987 t[1] = buf0[1]; 1988 t[2] = buf0[2]; 1989 t[3] = buf0[3]; 1990 t[4] = buf1[0]; 1991 t[5] = buf1[1]; 1992 t[6] = buf1[2]; 1993 t[7] = buf1[3]; 1994 1995 exchange_byte_optimized (t, p0, p1); 1996 1997 buf0[0] = t[0]; 1998 buf0[1] = t[1]; 1999 buf0[2] = t[2]; 2000 buf0[3] = t[3]; 2001 buf1[0] = t[4]; 2002 buf1[1] = t[5]; 2003 buf1[2] = t[6]; 2004 buf1[3] = t[7]; 2005 2006 return in_len; 2007} 2008 2009DECLSPEC u32 rule_op_mangle_chr_shiftl (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 2010{ 2011 if (p0 >= in_len) return in_len; 2012 2013 const u32 mr = 0xffu << ((p0 & 3) * 8); 2014 const u32 ml = ~mr; 2015 2016 u32 t[8]; 2017 2018 t[0] = buf0[0]; 2019 t[1] = buf0[1]; 2020 t[2] = buf0[2]; 2021 t[3] = buf0[3]; 2022 t[4] = buf1[0]; 2023 t[5] = buf1[1]; 2024 t[6] = buf1[2]; 2025 t[7] = buf1[3]; 2026 2027 const u32 tmp = t[p0 / 4]; 2028 2029 t[p0 / 4] = (tmp & ml) | (((tmp & mr) << 1) & mr); 2030 2031 buf0[0] = t[0]; 2032 buf0[1] = t[1]; 2033 buf0[2] = t[2]; 2034 buf0[3] = t[3]; 2035 buf1[0] = t[4]; 2036 buf1[1] = t[5]; 2037 buf1[2] = t[6]; 2038 buf1[3] = t[7]; 2039 2040 return in_len; 2041} 2042 2043DECLSPEC u32 rule_op_mangle_chr_shiftr (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 2044{ 2045 if (p0 >= in_len) return in_len; 2046 2047 const u32 mr = 0xffu << ((p0 & 3) * 8); 2048 const u32 ml = ~mr; 2049 2050 u32 t[8]; 2051 2052 t[0] = buf0[0]; 2053 t[1] = buf0[1]; 2054 t[2] = buf0[2]; 2055 t[3] = buf0[3]; 2056 t[4] = buf1[0]; 2057 t[5] = buf1[1]; 2058 t[6] = buf1[2]; 2059 t[7] = buf1[3]; 2060 2061 const u32 tmp = t[p0 / 4]; 2062 2063 t[p0 / 4] = (tmp & ml) | (((tmp & mr) >> 1) & mr); 2064 2065 buf0[0] = t[0]; 2066 buf0[1] = t[1]; 2067 buf0[2] = t[2]; 2068 buf0[3] = t[3]; 2069 buf1[0] = t[4]; 2070 buf1[1] = t[5]; 2071 buf1[2] = t[6]; 2072 buf1[3] = t[7]; 2073 2074 return in_len; 2075} 2076 2077DECLSPEC u32 rule_op_mangle_chr_incr (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 2078{ 2079 if (p0 >= in_len) return in_len; 2080 2081 const u32 mr = 0xffu << ((p0 & 3) * 8); 2082 const u32 ml = ~mr; 2083 2084 const u32 n = 0x01010101 & mr; 2085 2086 u32 t[8]; 2087 2088 t[0] = buf0[0]; 2089 t[1] = buf0[1]; 2090 t[2] = buf0[2]; 2091 t[3] = buf0[3]; 2092 t[4] = buf1[0]; 2093 t[5] = buf1[1]; 2094 t[6] = buf1[2]; 2095 t[7] = buf1[3]; 2096 2097 const u32 tmp = t[p0 / 4]; 2098 2099 t[p0 / 4] = (tmp & ml) | (((tmp & mr) + n) & mr); 2100 2101 buf0[0] = t[0]; 2102 buf0[1] = t[1]; 2103 buf0[2] = t[2]; 2104 buf0[3] = t[3]; 2105 buf1[0] = t[4]; 2106 buf1[1] = t[5]; 2107 buf1[2] = t[6]; 2108 buf1[3] = t[7]; 2109 2110 return in_len; 2111} 2112 2113DECLSPEC u32 rule_op_mangle_chr_decr (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 2114{ 2115 if (p0 >= in_len) return in_len; 2116 2117 const u32 mr = 0xffu << ((p0 & 3) * 8); 2118 const u32 ml = ~mr; 2119 2120 const u32 n = 0x01010101 & mr; 2121 2122 u32 t[8]; 2123 2124 t[0] = buf0[0]; 2125 t[1] = buf0[1]; 2126 t[2] = buf0[2]; 2127 t[3] = buf0[3]; 2128 t[4] = buf1[0]; 2129 t[5] = buf1[1]; 2130 t[6] = buf1[2]; 2131 t[7] = buf1[3]; 2132 2133 const u32 tmp = t[p0 / 4]; 2134 2135 t[p0 / 4] = (tmp & ml) | (((tmp & mr) - n) & mr); 2136 2137 buf0[0] = t[0]; 2138 buf0[1] = t[1]; 2139 buf0[2] = t[2]; 2140 buf0[3] = t[3]; 2141 buf1[0] = t[4]; 2142 buf1[1] = t[5]; 2143 buf1[2] = t[6]; 2144 buf1[3] = t[7]; 2145 2146 return in_len; 2147} 2148 2149DECLSPEC u32 rule_op_mangle_replace_np1 (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 2150{ 2151 if ((p0 + 1) >= in_len) return in_len; 2152 2153 u32 tib4x[8]; 2154 2155 lshift_block_optimized (buf0, buf1, tib4x + 0, tib4x + 4); 2156 2157 const u32 mr = 0xffu << ((p0 & 3) * 8); 2158 const u32 ml = ~mr; 2159 2160 u32 t[8]; 2161 2162 t[0] = buf0[0]; 2163 t[1] = buf0[1]; 2164 t[2] = buf0[2]; 2165 t[3] = buf0[3]; 2166 t[4] = buf1[0]; 2167 t[5] = buf1[1]; 2168 t[6] = buf1[2]; 2169 t[7] = buf1[3]; 2170 2171 const u32 tmp = t[p0 / 4]; 2172 2173 const u32 tmp2 = tib4x[p0 / 4]; 2174 2175 t[p0 / 4] = (tmp & ml) | (tmp2 & mr); 2176 2177 buf0[0] = t[0]; 2178 buf0[1] = t[1]; 2179 buf0[2] = t[2]; 2180 buf0[3] = t[3]; 2181 buf1[0] = t[4]; 2182 buf1[1] = t[5]; 2183 buf1[2] = t[6]; 2184 buf1[3] = t[7]; 2185 2186 return in_len; 2187} 2188 2189DECLSPEC u32 rule_op_mangle_replace_nm1 (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 2190{ 2191 if (p0 == 0) return in_len; 2192 2193 if (p0 >= in_len) return in_len; 2194 2195 u32 tib4x[8]; 2196 2197 rshift_block_optimized (buf0, buf1, tib4x + 0, tib4x + 4); 2198 2199 const u32 mr = 0xffu << ((p0 & 3) * 8); 2200 const u32 ml = ~mr; 2201 2202 u32 t[8]; 2203 2204 t[0] = buf0[0]; 2205 t[1] = buf0[1]; 2206 t[2] = buf0[2]; 2207 t[3] = buf0[3]; 2208 t[4] = buf1[0]; 2209 t[5] = buf1[1]; 2210 t[6] = buf1[2]; 2211 t[7] = buf1[3]; 2212 2213 const u32 tmp = t[p0 / 4]; 2214 2215 const u32 tmp2 = tib4x[p0 / 4]; 2216 2217 t[p0 / 4] = (tmp & ml) | (tmp2 & mr); 2218 2219 buf0[0] = t[0]; 2220 buf0[1] = t[1]; 2221 buf0[2] = t[2]; 2222 buf0[3] = t[3]; 2223 buf1[0] = t[4]; 2224 buf1[1] = t[5]; 2225 buf1[2] = t[6]; 2226 buf1[3] = t[7]; 2227 2228 return in_len; 2229} 2230 2231DECLSPEC u32 rule_op_mangle_dupeblock_first (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 2232{ 2233 if (p0 > in_len) return in_len; 2234 2235 if ((in_len + p0) >= 32) return in_len; 2236 2237 u32 out_len = in_len; 2238 2239 u32 tib40[4]; 2240 u32 tib41[4]; 2241 2242 tib40[0] = buf0[0]; 2243 tib40[1] = buf0[1]; 2244 tib40[2] = buf0[2]; 2245 tib40[3] = buf0[3]; 2246 tib41[0] = buf1[0]; 2247 tib41[1] = buf1[1]; 2248 tib41[2] = buf1[2]; 2249 tib41[3] = buf1[3]; 2250 2251 truncate_right_optimized (tib40, tib41, p0); 2252 2253 rshift_block_optimized_N (buf0, buf1, buf0, buf1, p0); 2254 2255 buf0[0] |= tib40[0]; 2256 buf0[1] |= tib40[1]; 2257 buf0[2] |= tib40[2]; 2258 buf0[3] |= tib40[3]; 2259 buf1[0] |= tib41[0]; 2260 buf1[1] |= tib41[1]; 2261 buf1[2] |= tib41[2]; 2262 buf1[3] |= tib41[3]; 2263 2264 out_len += p0; 2265 2266 return out_len; 2267} 2268 2269DECLSPEC u32 rule_op_mangle_dupeblock_last (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 2270{ 2271 if (p0 > in_len) return in_len; 2272 2273 if ((in_len + p0) >= 32) return in_len; 2274 2275 u32 out_len = in_len; 2276 2277 u32 tib40[4]; 2278 u32 tib41[4]; 2279 2280 rshift_block_optimized_N (buf0, buf1, tib40, tib41, p0); 2281 2282 truncate_left_optimized (tib40, tib41, out_len); 2283 2284 buf0[0] |= tib40[0]; 2285 buf0[1] |= tib40[1]; 2286 buf0[2] |= tib40[2]; 2287 buf0[3] |= tib40[3]; 2288 buf1[0] |= tib41[0]; 2289 buf1[1] |= tib41[1]; 2290 buf1[2] |= tib41[2]; 2291 buf1[3] |= tib41[3]; 2292 2293 out_len += p0; 2294 2295 return out_len; 2296} 2297 2298DECLSPEC u32 toggle_on_register (const u32 in, const u32 r) 2299{ 2300 u32 out = in; 2301 2302 const u32 cmask = generate_cmask_optimized (out); 2303 2304 if (r & 1) out = out ^ (0x00000020 & cmask); 2305 if (r & 2) out = out ^ (0x00002000 & cmask); 2306 if (r & 4) out = out ^ (0x00200000 & cmask); 2307 if (r & 8) out = out ^ (0x20000000 & cmask); 2308 2309 return out; 2310} 2311 2312DECLSPEC u32 rule_op_mangle_title_sep (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len) 2313{ 2314 if (in_len == 0) return in_len; 2315 2316 u32 r0 = search_on_register (buf0[0], p0); 2317 u32 r1 = search_on_register (buf0[1], p0); 2318 u32 r2 = search_on_register (buf0[2], p0); 2319 u32 r3 = search_on_register (buf0[3], p0); 2320 u32 r4 = search_on_register (buf1[0], p0); 2321 u32 r5 = search_on_register (buf1[1], p0); 2322 u32 r6 = search_on_register (buf1[2], p0); 2323 u32 r7 = search_on_register (buf1[3], p0); 2324 2325 rule_op_mangle_lrest_ufirst (p0, p1, buf0, buf1, in_len); 2326 2327 const u32 rn = r0 + r1 + r2 + r3 + r4 + r5 + r6 + r7; 2328 2329 if (rn == 0) return in_len; 2330 2331 r0 <<= 1; 2332 r1 <<= 1; r1 |= r0 >> 4; 2333 r2 <<= 1; r2 |= r1 >> 4; 2334 r3 <<= 1; r3 |= r2 >> 4; 2335 r4 <<= 1; r4 |= r3 >> 4; 2336 r5 <<= 1; r5 |= r4 >> 4; 2337 r6 <<= 1; r6 |= r5 >> 4; 2338 r7 <<= 1; r7 |= r6 >> 4; 2339 2340 buf0[0] = toggle_on_register (buf0[0], r0); 2341 buf0[1] = toggle_on_register (buf0[1], r1); 2342 buf0[2] = toggle_on_register (buf0[2], r2); 2343 buf0[3] = toggle_on_register (buf0[3], r3); 2344 buf1[0] = toggle_on_register (buf1[0], r4); 2345 buf1[1] = toggle_on_register (buf1[1], r5); 2346 buf1[2] = toggle_on_register (buf1[2], r6); 2347 buf1[3] = toggle_on_register (buf1[3], r7); 2348 2349 return in_len; 2350} 2351 2352DECLSPEC u32 apply_rule_optimized (const u32 name, const u32 p0, const u32 p1, u32 *buf0, u32 *buf1, const u32 in_len) 2353{ 2354 u32 out_len = in_len; 2355 2356 switch (name) 2357 { 2358 case RULE_OP_MANGLE_LREST: out_len = rule_op_mangle_lrest (p0, p1, buf0, buf1, out_len); break; 2359 case RULE_OP_MANGLE_UREST: out_len = rule_op_mangle_urest (p0, p1, buf0, buf1, out_len); break; 2360 case RULE_OP_MANGLE_LREST_UFIRST: out_len = rule_op_mangle_lrest_ufirst (p0, p1, buf0, buf1, out_len); break; 2361 case RULE_OP_MANGLE_UREST_LFIRST: out_len = rule_op_mangle_urest_lfirst (p0, p1, buf0, buf1, out_len); break; 2362 case RULE_OP_MANGLE_TREST: out_len = rule_op_mangle_trest (p0, p1, buf0, buf1, out_len); break; 2363 case RULE_OP_MANGLE_TOGGLE_AT: out_len = rule_op_mangle_toggle_at (p0, p1, buf0, buf1, out_len); break; 2364 case RULE_OP_MANGLE_TOGGLE_AT_SEP: out_len = rule_op_mangle_toggle_at_sep (p0, p1, buf0, buf1, out_len); break; 2365 case RULE_OP_MANGLE_REVERSE: out_len = rule_op_mangle_reverse (p0, p1, buf0, buf1, out_len); break; 2366 case RULE_OP_MANGLE_DUPEWORD: out_len = rule_op_mangle_dupeword (p0, p1, buf0, buf1, out_len); break; 2367 case RULE_OP_MANGLE_DUPEWORD_TIMES: out_len = rule_op_mangle_dupeword_times (p0, p1, buf0, buf1, out_len); break; 2368 case RULE_OP_MANGLE_REFLECT: out_len = rule_op_mangle_reflect (p0, p1, buf0, buf1, out_len); break; 2369 case RULE_OP_MANGLE_APPEND: out_len = rule_op_mangle_append (p0, p1, buf0, buf1, out_len); break; 2370 case RULE_OP_MANGLE_PREPEND: out_len = rule_op_mangle_prepend (p0, p1, buf0, buf1, out_len); break; 2371 case RULE_OP_MANGLE_ROTATE_LEFT: out_len = rule_op_mangle_rotate_left (p0, p1, buf0, buf1, out_len); break; 2372 case RULE_OP_MANGLE_ROTATE_RIGHT: out_len = rule_op_mangle_rotate_right (p0, p1, buf0, buf1, out_len); break; 2373 case RULE_OP_MANGLE_DELETE_FIRST: out_len = rule_op_mangle_delete_first (p0, p1, buf0, buf1, out_len); break; 2374 case RULE_OP_MANGLE_DELETE_LAST: out_len = rule_op_mangle_delete_last (p0, p1, buf0, buf1, out_len); break; 2375 case RULE_OP_MANGLE_DELETE_AT: out_len = rule_op_mangle_delete_at (p0, p1, buf0, buf1, out_len); break; 2376 case RULE_OP_MANGLE_EXTRACT: out_len = rule_op_mangle_extract (p0, p1, buf0, buf1, out_len); break; 2377 case RULE_OP_MANGLE_OMIT: out_len = rule_op_mangle_omit (p0, p1, buf0, buf1, out_len); break; 2378 case RULE_OP_MANGLE_INSERT: out_len = rule_op_mangle_insert (p0, p1, buf0, buf1, out_len); break; 2379 case RULE_OP_MANGLE_OVERSTRIKE: out_len = rule_op_mangle_overstrike (p0, p1, buf0, buf1, out_len); break; 2380 case RULE_OP_MANGLE_TRUNCATE_AT: out_len = rule_op_mangle_truncate_at (p0, p1, buf0, buf1, out_len); break; 2381 case RULE_OP_MANGLE_REPLACE: out_len = rule_op_mangle_replace (p0, p1, buf0, buf1, out_len); break; 2382 case RULE_OP_MANGLE_PURGECHAR: out_len = rule_op_mangle_purgechar (p0, p1, buf0, buf1, out_len); break; 2383 //case RULE_OP_MANGLE_TOGGLECASE_REC: out_len = rule_op_mangle_togglecase_rec (p0, p1, buf0, buf1, out_len); break; 2384 case RULE_OP_MANGLE_DUPECHAR_FIRST: out_len = rule_op_mangle_dupechar_first (p0, p1, buf0, buf1, out_len); break; 2385 case RULE_OP_MANGLE_DUPECHAR_LAST: out_len = rule_op_mangle_dupechar_last (p0, p1, buf0, buf1, out_len); break; 2386 case RULE_OP_MANGLE_DUPECHAR_ALL: out_len = rule_op_mangle_dupechar_all (p0, p1, buf0, buf1, out_len); break; 2387 case RULE_OP_MANGLE_SWITCH_FIRST: out_len = rule_op_mangle_switch_first (p0, p1, buf0, buf1, out_len); break; 2388 case RULE_OP_MANGLE_SWITCH_LAST: out_len = rule_op_mangle_switch_last (p0, p1, buf0, buf1, out_len); break; 2389 case RULE_OP_MANGLE_SWITCH_AT: out_len = rule_op_mangle_switch_at (p0, p1, buf0, buf1, out_len); break; 2390 case RULE_OP_MANGLE_CHR_SHIFTL: out_len = rule_op_mangle_chr_shiftl (p0, p1, buf0, buf1, out_len); break; 2391 case RULE_OP_MANGLE_CHR_SHIFTR: out_len = rule_op_mangle_chr_shiftr (p0, p1, buf0, buf1, out_len); break; 2392 case RULE_OP_MANGLE_CHR_INCR: out_len = rule_op_mangle_chr_incr (p0, p1, buf0, buf1, out_len); break; 2393 case RULE_OP_MANGLE_CHR_DECR: out_len = rule_op_mangle_chr_decr (p0, p1, buf0, buf1, out_len); break; 2394 case RULE_OP_MANGLE_REPLACE_NP1: out_len = rule_op_mangle_replace_np1 (p0, p1, buf0, buf1, out_len); break; 2395 case RULE_OP_MANGLE_REPLACE_NM1: out_len = rule_op_mangle_replace_nm1 (p0, p1, buf0, buf1, out_len); break; 2396 case RULE_OP_MANGLE_DUPEBLOCK_FIRST: out_len = rule_op_mangle_dupeblock_first (p0, p1, buf0, buf1, out_len); break; 2397 case RULE_OP_MANGLE_DUPEBLOCK_LAST: out_len = rule_op_mangle_dupeblock_last (p0, p1, buf0, buf1, out_len); break; 2398 case RULE_OP_MANGLE_TITLE_SEP: out_len = rule_op_mangle_title_sep (p0, p1, buf0, buf1, out_len); break; 2399 case RULE_OP_MANGLE_TITLE: out_len = rule_op_mangle_title_sep (' ', p1, buf0, buf1, out_len); break; 2400 } 2401 2402 return out_len; 2403} 2404 2405DECLSPEC u32 apply_rules_optimized (CONSTANT_AS const u32 *cmds, u32 *buf0, u32 *buf1, const u32 len) 2406{ 2407 u32 out_len = len; 2408 2409 for (u32 i = 0; cmds[i] != 0; i++) 2410 { 2411 const u32 cmd = cmds[i]; 2412 2413 const u32 name = (cmd >> 0) & 0xff; 2414 const u32 p0 = (cmd >> 8) & 0xff; 2415 const u32 p1 = (cmd >> 16) & 0xff; 2416 2417 // we need to guarantee input length < 32 otherwise functions like rule_op_mangle_switch_last() and others will read out of boundary 2418 out_len = apply_rule_optimized (name, p0, p1, buf0, buf1, out_len); 2419 } 2420 2421 return out_len; 2422} 2423 2424DECLSPEC u32x apply_rules_vect_optimized (const u32 *pw_buf0, const u32 *pw_buf1, const u32 pw_len, CONSTANT_AS const kernel_rule_t *kernel_rules, const u32 il_pos, u32x *buf0, u32x *buf1) 2425{ 2426 #if VECT_SIZE == 1 2427 2428 buf0[0] = pw_buf0[0]; 2429 buf0[1] = pw_buf0[1]; 2430 buf0[2] = pw_buf0[2]; 2431 buf0[3] = pw_buf0[3]; 2432 buf1[0] = pw_buf1[0]; 2433 buf1[1] = pw_buf1[1]; 2434 buf1[2] = pw_buf1[2]; 2435 buf1[3] = pw_buf1[3]; 2436 2437 return apply_rules_optimized (kernel_rules[il_pos].cmds, buf0, buf1, pw_len); 2438 2439 #else 2440 2441 u32x out_len = 0; 2442 2443 #ifdef _unroll 2444 #pragma unroll 2445 #endif 2446 for (int i = 0; i < VECT_SIZE; i++) 2447 { 2448 u32 tmp0[4]; 2449 u32 tmp1[4]; 2450 2451 tmp0[0] = pw_buf0[0]; 2452 tmp0[1] = pw_buf0[1]; 2453 tmp0[2] = pw_buf0[2]; 2454 tmp0[3] = pw_buf0[3]; 2455 tmp1[0] = pw_buf1[0]; 2456 tmp1[1] = pw_buf1[1]; 2457 tmp1[2] = pw_buf1[2]; 2458 tmp1[3] = pw_buf1[3]; 2459 2460 const u32 tmp_len = apply_rules_optimized (kernel_rules[il_pos + i].cmds, tmp0, tmp1, pw_len); 2461 2462 switch (i) 2463 { 2464 #if VECT_SIZE >= 2 2465 case 0: 2466 buf0[0].s0 = tmp0[0]; 2467 buf0[1].s0 = tmp0[1]; 2468 buf0[2].s0 = tmp0[2]; 2469 buf0[3].s0 = tmp0[3]; 2470 buf1[0].s0 = tmp1[0]; 2471 buf1[1].s0 = tmp1[1]; 2472 buf1[2].s0 = tmp1[2]; 2473 buf1[3].s0 = tmp1[3]; 2474 out_len.s0 = tmp_len; 2475 break; 2476 2477 case 1: 2478 buf0[0].s1 = tmp0[0]; 2479 buf0[1].s1 = tmp0[1]; 2480 buf0[2].s1 = tmp0[2]; 2481 buf0[3].s1 = tmp0[3]; 2482 buf1[0].s1 = tmp1[0]; 2483 buf1[1].s1 = tmp1[1]; 2484 buf1[2].s1 = tmp1[2]; 2485 buf1[3].s1 = tmp1[3]; 2486 out_len.s1 = tmp_len; 2487 break; 2488 #endif 2489 2490 #if VECT_SIZE >= 4 2491 case 2: 2492 buf0[0].s2 = tmp0[0]; 2493 buf0[1].s2 = tmp0[1]; 2494 buf0[2].s2 = tmp0[2]; 2495 buf0[3].s2 = tmp0[3]; 2496 buf1[0].s2 = tmp1[0]; 2497 buf1[1].s2 = tmp1[1]; 2498 buf1[2].s2 = tmp1[2]; 2499 buf1[3].s2 = tmp1[3]; 2500 out_len.s2 = tmp_len; 2501 break; 2502 2503 case 3: 2504 buf0[0].s3 = tmp0[0]; 2505 buf0[1].s3 = tmp0[1]; 2506 buf0[2].s3 = tmp0[2]; 2507 buf0[3].s3 = tmp0[3]; 2508 buf1[0].s3 = tmp1[0]; 2509 buf1[1].s3 = tmp1[1]; 2510 buf1[2].s3 = tmp1[2]; 2511 buf1[3].s3 = tmp1[3]; 2512 out_len.s3 = tmp_len; 2513 break; 2514 #endif 2515 2516 #if VECT_SIZE >= 8 2517 case 4: 2518 buf0[0].s4 = tmp0[0]; 2519 buf0[1].s4 = tmp0[1]; 2520 buf0[2].s4 = tmp0[2]; 2521 buf0[3].s4 = tmp0[3]; 2522 buf1[0].s4 = tmp1[0]; 2523 buf1[1].s4 = tmp1[1]; 2524 buf1[2].s4 = tmp1[2]; 2525 buf1[3].s4 = tmp1[3]; 2526 out_len.s4 = tmp_len; 2527 break; 2528 2529 case 5: 2530 buf0[0].s5 = tmp0[0]; 2531 buf0[1].s5 = tmp0[1]; 2532 buf0[2].s5 = tmp0[2]; 2533 buf0[3].s5 = tmp0[3]; 2534 buf1[0].s5 = tmp1[0]; 2535 buf1[1].s5 = tmp1[1]; 2536 buf1[2].s5 = tmp1[2]; 2537 buf1[3].s5 = tmp1[3]; 2538 out_len.s5 = tmp_len; 2539 break; 2540 2541 case 6: 2542 buf0[0].s6 = tmp0[0]; 2543 buf0[1].s6 = tmp0[1]; 2544 buf0[2].s6 = tmp0[2]; 2545 buf0[3].s6 = tmp0[3]; 2546 buf1[0].s6 = tmp1[0]; 2547 buf1[1].s6 = tmp1[1]; 2548 buf1[2].s6 = tmp1[2]; 2549 buf1[3].s6 = tmp1[3]; 2550 out_len.s6 = tmp_len; 2551 break; 2552 2553 case 7: 2554 buf0[0].s7 = tmp0[0]; 2555 buf0[1].s7 = tmp0[1]; 2556 buf0[2].s7 = tmp0[2]; 2557 buf0[3].s7 = tmp0[3]; 2558 buf1[0].s7 = tmp1[0]; 2559 buf1[1].s7 = tmp1[1]; 2560 buf1[2].s7 = tmp1[2]; 2561 buf1[3].s7 = tmp1[3]; 2562 out_len.s7 = tmp_len; 2563 break; 2564 #endif 2565 2566 #if VECT_SIZE >= 16 2567 case 8: 2568 buf0[0].s8 = tmp0[0]; 2569 buf0[1].s8 = tmp0[1]; 2570 buf0[2].s8 = tmp0[2]; 2571 buf0[3].s8 = tmp0[3]; 2572 buf1[0].s8 = tmp1[0]; 2573 buf1[1].s8 = tmp1[1]; 2574 buf1[2].s8 = tmp1[2]; 2575 buf1[3].s8 = tmp1[3]; 2576 out_len.s8 = tmp_len; 2577 break; 2578 2579 case 9: 2580 buf0[0].s9 = tmp0[0]; 2581 buf0[1].s9 = tmp0[1]; 2582 buf0[2].s9 = tmp0[2]; 2583 buf0[3].s9 = tmp0[3]; 2584 buf1[0].s9 = tmp1[0]; 2585 buf1[1].s9 = tmp1[1]; 2586 buf1[2].s9 = tmp1[2]; 2587 buf1[3].s9 = tmp1[3]; 2588 out_len.s9 = tmp_len; 2589 break; 2590 2591 case 10: 2592 buf0[0].sa = tmp0[0]; 2593 buf0[1].sa = tmp0[1]; 2594 buf0[2].sa = tmp0[2]; 2595 buf0[3].sa = tmp0[3]; 2596 buf1[0].sa = tmp1[0]; 2597 buf1[1].sa = tmp1[1]; 2598 buf1[2].sa = tmp1[2]; 2599 buf1[3].sa = tmp1[3]; 2600 out_len.sa = tmp_len; 2601 break; 2602 2603 case 11: 2604 buf0[0].sb = tmp0[0]; 2605 buf0[1].sb = tmp0[1]; 2606 buf0[2].sb = tmp0[2]; 2607 buf0[3].sb = tmp0[3]; 2608 buf1[0].sb = tmp1[0]; 2609 buf1[1].sb = tmp1[1]; 2610 buf1[2].sb = tmp1[2]; 2611 buf1[3].sb = tmp1[3]; 2612 out_len.sb = tmp_len; 2613 break; 2614 2615 case 12: 2616 buf0[0].sc = tmp0[0]; 2617 buf0[1].sc = tmp0[1]; 2618 buf0[2].sc = tmp0[2]; 2619 buf0[3].sc = tmp0[3]; 2620 buf1[0].sc = tmp1[0]; 2621 buf1[1].sc = tmp1[1]; 2622 buf1[2].sc = tmp1[2]; 2623 buf1[3].sc = tmp1[3]; 2624 out_len.sc = tmp_len; 2625 break; 2626 2627 case 13: 2628 buf0[0].sd = tmp0[0]; 2629 buf0[1].sd = tmp0[1]; 2630 buf0[2].sd = tmp0[2]; 2631 buf0[3].sd = tmp0[3]; 2632 buf1[0].sd = tmp1[0]; 2633 buf1[1].sd = tmp1[1]; 2634 buf1[2].sd = tmp1[2]; 2635 buf1[3].sd = tmp1[3]; 2636 out_len.sd = tmp_len; 2637 break; 2638 2639 case 14: 2640 buf0[0].se = tmp0[0]; 2641 buf0[1].se = tmp0[1]; 2642 buf0[2].se = tmp0[2]; 2643 buf0[3].se = tmp0[3]; 2644 buf1[0].se = tmp1[0]; 2645 buf1[1].se = tmp1[1]; 2646 buf1[2].se = tmp1[2]; 2647 buf1[3].se = tmp1[3]; 2648 out_len.se = tmp_len; 2649 break; 2650 2651 case 15: 2652 buf0[0].sf = tmp0[0]; 2653 buf0[1].sf = tmp0[1]; 2654 buf0[2].sf = tmp0[2]; 2655 buf0[3].sf = tmp0[3]; 2656 buf1[0].sf = tmp1[0]; 2657 buf1[1].sf = tmp1[1]; 2658 buf1[2].sf = tmp1[2]; 2659 buf1[3].sf = tmp1[3]; 2660 out_len.sf = tmp_len; 2661 break; 2662 #endif 2663 } 2664 } 2665 2666 return out_len; 2667 2668 #endif 2669} 2670