1 // VectorOperations.cs 2 // 3 // Author: 4 // Rodrigo Kumpera (rkumpera@novell.com) 5 // 6 // (C) 2008 Novell, Inc. (http://www.novell.com) 7 // 8 // Permission is hereby granted, free of charge, to any person obtaining 9 // a copy of this software and associated documentation files (the 10 // "Software"), to deal in the Software without restriction, including 11 // without limitation the rights to use, copy, modify, merge, publish, 12 // distribute, sublicense, and/or sell copies of the Software, and to 13 // permit persons to whom the Software is furnished to do so, subject to 14 // the following conditions: 15 // 16 // The above copyright notice and this permission notice shall be 17 // included in all copies or substantial portions of the Software. 18 // 19 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 22 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 23 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 24 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 25 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 // 27 28 using System; 29 namespace Mono.Simd 30 { 31 public static class VectorOperations 32 { 33 /* ==== Bitwise operations ==== */ 34 35 [Acceleration (AccelMode.SSE1)] AndNot(this Vector4f v1, Vector4f v2)36 public static unsafe Vector4f AndNot (this Vector4f v1, Vector4f v2) 37 { 38 Vector4f res = new Vector4f (); 39 int *a = (int*)&v1; 40 int *b = (int*)&v2; 41 int *c = (int*)&res; 42 *c++ = ~*a++ & *b++; 43 *c++ = ~*a++ & *b++; 44 *c++ = ~*a++ & *b++; 45 *c = ~*a & *b; 46 return res; 47 } 48 49 [Acceleration (AccelMode.SSE2)] AndNot(this Vector2d v1, Vector2d v2)50 public static unsafe Vector2d AndNot (this Vector2d v1, Vector2d v2) 51 { 52 Vector2d res = new Vector2d (); 53 int *a = (int*)&v1; 54 int *b = (int*)&v2; 55 int *c = (int*)&res; 56 *c++ = ~*a++ & *b++; 57 *c++ = ~*a++ & *b++; 58 *c++ = ~*a++ & *b++; 59 *c = ~*a & *b; 60 return res; 61 } 62 63 [Acceleration (AccelMode.SSE2)] 64 [CLSCompliant (false)] ArithmeticRightShift(this Vector4ui v1, int amount)65 public static unsafe Vector4ui ArithmeticRightShift (this Vector4ui v1, int amount) 66 { 67 Vector4ui res = new Vector4ui (); 68 uint *a = &v1.x; 69 uint *b = &res.x; 70 for (int i = 0; i < 4; ++i) 71 *b++ = (uint)((int)(*a++) >> amount); 72 return res; 73 } 74 75 [Acceleration (AccelMode.SSE2)] 76 [CLSCompliant (false)] ArithmeticRightShift(this Vector8us va, int amount)77 public static unsafe Vector8us ArithmeticRightShift (this Vector8us va, int amount) 78 { 79 Vector8us res = new Vector8us (); 80 ushort *a = &va.v0; 81 ushort *b = &res.v0; 82 for (int i = 0; i < 8; ++i) 83 *b++ = (ushort)((short)(*a++) >> amount); 84 return res; 85 } 86 87 [Acceleration (AccelMode.SSE2)] LogicalRightShift(this Vector2l v1, int amount)88 public static unsafe Vector2l LogicalRightShift (this Vector2l v1, int amount) 89 { 90 return new Vector2l ((long)((ulong)(v1.x) >> amount), (long)((ulong)(v1.y) >> amount)); 91 } 92 93 [Acceleration (AccelMode.SSE2)] LogicalRightShift(this Vector4i v1, int amount)94 public static unsafe Vector4i LogicalRightShift (this Vector4i v1, int amount) 95 { 96 Vector4i res = new Vector4i (); 97 int *a = &v1.x; 98 int *b = &res.x; 99 for (int i = 0; i < 4; ++i) 100 *b++ = (int)((uint)(*a++) >> amount); 101 return res; 102 } 103 104 [Acceleration (AccelMode.SSE2)] LogicalRightShift(this Vector8s va, int amount)105 public static unsafe Vector8s LogicalRightShift (this Vector8s va, int amount) 106 { 107 Vector8s res = new Vector8s (); 108 short *a = &va.v0; 109 short *b = &res.v0; 110 for (int i = 0; i < 8; ++i) 111 *b++ = (short)((ushort)(*a++) >> amount); 112 return res; 113 } 114 115 [Acceleration (AccelMode.SSE2)] 116 [CLSCompliant (false)] ExtractByteMask(this Vector16sb va)117 public static unsafe int ExtractByteMask (this Vector16sb va) { 118 int res = 0; 119 sbyte *a = (sbyte*)&va; 120 for (int i = 0; i < 16; ++i) 121 res |= (*a++ & 0x80) >> 7 << i; 122 return res; 123 } 124 125 [Acceleration (AccelMode.SSE2)] ExtractByteMask(this Vector16b va)126 public static unsafe int ExtractByteMask (this Vector16b va) { 127 int res = 0; 128 byte *a = (byte*)&va; 129 for (int i = 0; i < 16; ++i) 130 res |= (*a++ & 0x80) >> 7 << i; 131 return res; 132 } 133 134 135 /* ==== Math operations ==== */ 136 137 [Acceleration (AccelMode.SSE2)] AddWithSaturation(this Vector8s va, Vector8s vb)138 public static unsafe Vector8s AddWithSaturation (this Vector8s va, Vector8s vb) { 139 Vector8s res = new Vector8s (); 140 short *a = &va.v0; 141 short *b = &vb.v0; 142 short *c = &res.v0; 143 for (int i = 0; i < 8; ++i) 144 *c++ = (short) System.Math.Max (System.Math.Min (*a++ + *b++, short.MaxValue), short.MinValue); 145 return res; 146 } 147 148 [Acceleration (AccelMode.SSE2)] 149 [CLSCompliant (false)] AddWithSaturation(this Vector8us va, Vector8us vb)150 public static unsafe Vector8us AddWithSaturation (this Vector8us va, Vector8us vb) { 151 Vector8us res = new Vector8us (); 152 ushort *a = &va.v0; 153 ushort *b = &vb.v0; 154 ushort *c = &res.v0; 155 for (int i = 0; i < 8; ++i) 156 *c++ = (ushort) System.Math.Min (*a++ + *b++, ushort.MaxValue); 157 return res; 158 } 159 160 [Acceleration (AccelMode.SSE2)] 161 [CLSCompliant (false)] AddWithSaturation(this Vector16sb va, Vector16sb vb)162 public static unsafe Vector16sb AddWithSaturation (this Vector16sb va, Vector16sb vb) { 163 Vector16sb res = new Vector16sb (); 164 sbyte *a = &va.v0; 165 sbyte *b = &vb.v0; 166 sbyte *c = &res.v0; 167 for (int i = 0; i < 16; ++i) 168 *c++ = (sbyte) System.Math.Max (System.Math.Min (*a++ + *b++, sbyte.MaxValue), sbyte.MinValue); 169 return res; 170 } 171 172 [Acceleration (AccelMode.SSE2)] AddWithSaturation(this Vector16b va, Vector16b vb)173 public static unsafe Vector16b AddWithSaturation (this Vector16b va, Vector16b vb) { 174 Vector16b res = new Vector16b (); 175 byte *a = &va.v0; 176 byte *b = &vb.v0; 177 byte *c = &res.v0; 178 for (int i = 0; i < 16; ++i) 179 *c++ = (byte) System.Math.Min (*a++ + *b++, byte.MaxValue); 180 return res; 181 } 182 183 [Acceleration (AccelMode.SSE2)] SubtractWithSaturation(this Vector8s va, Vector8s vb)184 public static unsafe Vector8s SubtractWithSaturation (this Vector8s va, Vector8s vb) { 185 Vector8s res = new Vector8s (); 186 short *a = &va.v0; 187 short *b = &vb.v0; 188 short *c = &res.v0; 189 for (int i = 0; i < 8; ++i) 190 *c++ = (short) System.Math.Max (System.Math.Min (*a++ - *b++, short.MaxValue), short.MinValue); ; 191 return res; 192 } 193 194 [Acceleration (AccelMode.SSE2)] 195 [CLSCompliant (false)] SubtractWithSaturation(this Vector8us va, Vector8us vb)196 public static unsafe Vector8us SubtractWithSaturation (this Vector8us va, Vector8us vb) { 197 Vector8us res = new Vector8us (); 198 ushort *a = &va.v0; 199 ushort *b = &vb.v0; 200 ushort *c = &res.v0; 201 for (int i = 0; i < 8; ++i) 202 *c++ = (ushort) System.Math.Max (*a++ - *b++, 0); 203 return res; 204 } 205 206 [Acceleration (AccelMode.SSE2)] 207 [CLSCompliant (false)] SubtractWithSaturation(this Vector16sb va, Vector16sb vb)208 public static unsafe Vector16sb SubtractWithSaturation (this Vector16sb va, Vector16sb vb) { 209 Vector16sb res = new Vector16sb (); 210 sbyte *a = &va.v0; 211 sbyte *b = &vb.v0; 212 sbyte *c = &res.v0; 213 for (int i = 0; i < 16; ++i) 214 *c++ = (sbyte) System.Math.Max (System.Math.Min (*a++ - *b++, sbyte.MaxValue), sbyte.MinValue); 215 return res; 216 } 217 218 [Acceleration (AccelMode.SSE2)] SubtractWithSaturation(this Vector16b va, Vector16b vb)219 public static unsafe Vector16b SubtractWithSaturation (this Vector16b va, Vector16b vb) { 220 Vector16b res = new Vector16b (); 221 byte *a = &va.v0; 222 byte *b = &vb.v0; 223 byte *c = &res.v0; 224 for (int i = 0; i < 16; ++i) 225 *c++ = (byte) System.Math.Max (*a++ - *b++, 0); 226 return res; 227 } 228 229 [Acceleration (AccelMode.SSE2)] MultiplyStoreHigh(this Vector8s va, Vector8s vb)230 public static unsafe Vector8s MultiplyStoreHigh (this Vector8s va, Vector8s vb) { 231 Vector8s res = new Vector8s (); 232 short *a = &va.v0; 233 short *b = &vb.v0; 234 short *c = &res.v0; 235 for (int i = 0; i < 8; ++i) 236 *c++ = (short)((int)*a++ * (int)*b++ >> 16); 237 return res; 238 } 239 240 [Acceleration (AccelMode.SSE2)] 241 [CLSCompliant (false)] MultiplyStoreHigh(this Vector8us va, Vector8us vb)242 public static unsafe Vector8us MultiplyStoreHigh (this Vector8us va, Vector8us vb) { 243 Vector8us res = new Vector8us (); 244 ushort *a = &va.v0; 245 ushort *b = &vb.v0; 246 ushort *c = &res.v0; 247 for (int i = 0; i < 8; ++i) 248 *c++ = (ushort)((uint)*a++ * (uint)*b++ >> 16); 249 return res; 250 } 251 252 [CLSCompliant(false)] 253 [Acceleration (AccelMode.SSE2)] SumOfAbsoluteDifferences(this Vector16b va, Vector16sb vb)254 public static unsafe Vector8us SumOfAbsoluteDifferences (this Vector16b va, Vector16sb vb) { 255 Vector8us res = new Vector8us (); 256 byte *a = &va.v0; 257 sbyte *b = (sbyte*)&vb; 258 259 int tmp = 0; 260 for (int i = 0; i < 8; ++i) 261 tmp += System.Math.Abs ((int)*a++ - (int)*b++); 262 res.V0 = (ushort)tmp; 263 264 tmp = 0; 265 for (int i = 0; i < 8; ++i) 266 tmp += System.Math.Abs ((int)*a++ - (int)*b++); 267 res.V4 = (ushort)tmp; 268 269 return res; 270 } 271 272 [Acceleration (AccelMode.SSE1)] Sqrt(this Vector4f v1)273 public static Vector4f Sqrt (this Vector4f v1) 274 { 275 return new Vector4f ((float)System.Math.Sqrt ((float)v1.x), 276 (float)System.Math.Sqrt ((float)v1.y), 277 (float)System.Math.Sqrt ((float)v1.z), 278 (float)System.Math.Sqrt ((float)v1.w)); 279 } 280 281 [Acceleration (AccelMode.SSE1)] InvSqrt(this Vector4f v1)282 public static Vector4f InvSqrt (this Vector4f v1) 283 { 284 return new Vector4f ((float)(1.0 / System.Math.Sqrt ((float)v1.x)), 285 (float)(1.0 / System.Math.Sqrt ((float)v1.y)), 286 (float)(1.0 / System.Math.Sqrt ((float)v1.z)), 287 (float)(1.0 / System.Math.Sqrt ((float)v1.w))); 288 } 289 290 [Acceleration (AccelMode.SSE1)] Reciprocal(this Vector4f v1)291 public static Vector4f Reciprocal (this Vector4f v1) 292 { 293 return new Vector4f (1.0f / v1.x, 1.0f / v1.y, 1.0f / v1.z, 1.0f / v1.w); 294 } 295 296 [Acceleration (AccelMode.SSE2)] Sqrt(this Vector2d v1)297 public static Vector2d Sqrt (this Vector2d v1) 298 { 299 return new Vector2d (System.Math.Sqrt (v1.x), 300 System.Math.Sqrt (v1.y)); 301 } 302 303 [Acceleration (AccelMode.SSE2)] 304 [CLSCompliant (false)] Average(this Vector8us va, Vector8us vb)305 public static unsafe Vector8us Average (this Vector8us va, Vector8us vb) { 306 Vector8us res = new Vector8us (); 307 ushort *a = &va.v0; 308 ushort *b = &vb.v0; 309 ushort *c = &res.v0; 310 for (int i = 0; i < 8; ++i) 311 *c++ = (ushort) ((*a++ + *b++ + 1) >> 1); 312 return res; 313 } 314 315 [Acceleration (AccelMode.SSE2)] Average(this Vector16b va, Vector16b vb)316 public static unsafe Vector16b Average (this Vector16b va, Vector16b vb) { 317 Vector16b res = new Vector16b (); 318 byte *a = &va.v0; 319 byte *b = &vb.v0; 320 byte *c = &res.v0; 321 for (int i = 0; i < 16; ++i) 322 *c++ = (byte) ((*a++ + *b++ + 1) >> 1); 323 return res; 324 } 325 326 [Acceleration (AccelMode.SSE1)] Max(this Vector4f v1, Vector4f v2)327 public static Vector4f Max (this Vector4f v1, Vector4f v2) 328 { 329 return new Vector4f (System.Math.Max (v1.x, v2.x), 330 System.Math.Max (v1.y, v2.y), 331 System.Math.Max (v1.z, v2.z), 332 System.Math.Max (v1.w, v2.w)); 333 } 334 335 [Acceleration (AccelMode.SSE2)] Max(this Vector2d v1, Vector2d v2)336 public static Vector2d Max (this Vector2d v1, Vector2d v2) 337 { 338 return new Vector2d (System.Math.Max (v1.x, v2.x), 339 System.Math.Max (v1.y, v2.y)); 340 } 341 342 [Acceleration (AccelMode.SSE41)] Max(this Vector4i v1, Vector4i v2)343 public static Vector4i Max (this Vector4i v1, Vector4i v2) 344 { 345 return new Vector4i (System.Math.Max (v1.x, v2.x), System.Math.Max (v1.y, v2.y), System.Math.Max (v1.z, v2.z), System.Math.Max (v1.w, v2.w)); 346 } 347 348 [Acceleration (AccelMode.SSE41)] 349 [CLSCompliant (false)] Max(this Vector4ui v1, Vector4ui v2)350 public static Vector4ui Max (this Vector4ui v1, Vector4ui v2) 351 { 352 return new Vector4ui (System.Math.Max (v1.x, v2.x), System.Math.Max (v1.y, v2.y), System.Math.Max (v1.z, v2.z), System.Math.Max (v1.w, v2.w)); 353 } 354 355 [Acceleration (AccelMode.SSE2)] Max(this Vector8s va, Vector8s vb)356 public static unsafe Vector8s Max (this Vector8s va, Vector8s vb) { 357 Vector8s res = new Vector8s (); 358 short *a = &va.v0; 359 short *b = &vb.v0; 360 short *c = &res.v0; 361 for (int i = 0; i < 8; ++i) 362 *c++ = (short) System.Math.Max (*a++, *b++); 363 return res; 364 } 365 366 [Acceleration (AccelMode.SSE41)] 367 [CLSCompliant (false)] Max(this Vector8us va, Vector8us vb)368 public static unsafe Vector8us Max (this Vector8us va, Vector8us vb) { 369 Vector8us res = new Vector8us (); 370 ushort *a = &va.v0; 371 ushort *b = &vb.v0; 372 ushort *c = &res.v0; 373 for (int i = 0; i < 8; ++i) 374 *c++ = (ushort) System.Math.Max (*a++, *b++); 375 return res; 376 } 377 378 [Acceleration (AccelMode.SSE1)] Min(this Vector4f v1, Vector4f v2)379 public static Vector4f Min (this Vector4f v1, Vector4f v2) 380 { 381 return new Vector4f (System.Math.Min (v1.x, v2.x), 382 System.Math.Min (v1.y, v2.y), 383 System.Math.Min (v1.z, v2.z), 384 System.Math.Min (v1.w, v2.w)); 385 } 386 387 [Acceleration (AccelMode.SSE41)] 388 [CLSCompliant (false)] Max(this Vector16sb va, Vector16sb vb)389 public static unsafe Vector16sb Max (this Vector16sb va, Vector16sb vb) { 390 Vector16sb res = new Vector16sb (); 391 sbyte *a = &va.v0; 392 sbyte *b = &vb.v0; 393 sbyte *c = &res.v0; 394 for (int i = 0; i < 16; ++i) 395 *c++ = (sbyte) System.Math.Max (*a++, *b++); 396 return res; 397 } 398 399 [Acceleration (AccelMode.SSE2)] Max(this Vector16b va, Vector16b vb)400 public static unsafe Vector16b Max (this Vector16b va, Vector16b vb) { 401 Vector16b res = new Vector16b (); 402 byte *a = &va.v0; 403 byte *b = &vb.v0; 404 byte *c = &res.v0; 405 for (int i = 0; i < 16; ++i) 406 *c++ = (byte) System.Math.Max(*a++, *b++); 407 return res; 408 } 409 410 [Acceleration (AccelMode.SSE2)] Min(this Vector2d v1, Vector2d v2)411 public static Vector2d Min (this Vector2d v1, Vector2d v2) 412 { 413 return new Vector2d (System.Math.Min (v1.x, v2.x), 414 System.Math.Min (v1.y, v2.y)); 415 } 416 417 [Acceleration (AccelMode.SSE41)] Min(this Vector4i v1, Vector4i v2)418 public static Vector4i Min (this Vector4i v1, Vector4i v2) 419 { 420 return new Vector4i (System.Math.Min (v1.x, v2.x), System.Math.Min (v1.y, v2.y), System.Math.Min (v1.z, v2.z), System.Math.Min (v1.w, v2.w)); 421 } 422 423 [Acceleration (AccelMode.SSE41)] 424 [CLSCompliant (false)] Min(this Vector4ui v1, Vector4ui v2)425 public static Vector4ui Min (this Vector4ui v1, Vector4ui v2) 426 { 427 return new Vector4ui (System.Math.Min (v1.x, v2.x), System.Math.Min (v1.y, v2.y), System.Math.Min (v1.z, v2.z), System.Math.Min (v1.w, v2.w)); 428 } 429 430 [Acceleration (AccelMode.SSE2)] Min(this Vector8s va, Vector8s vb)431 public static unsafe Vector8s Min (this Vector8s va, Vector8s vb) { 432 Vector8s res = new Vector8s (); 433 short *a = &va.v0; 434 short *b = &vb.v0; 435 short *c = &res.v0; 436 for (int i = 0; i < 8; ++i) 437 *c++ = (short) System.Math.Min (*a++, *b++); 438 return res; 439 } 440 441 [Acceleration (AccelMode.SSE41)] 442 [CLSCompliant (false)] Min(this Vector8us va, Vector8us vb)443 public static unsafe Vector8us Min (this Vector8us va, Vector8us vb) { 444 Vector8us res = new Vector8us (); 445 ushort *a = &va.v0; 446 ushort *b = &vb.v0; 447 ushort *c = &res.v0; 448 for (int i = 0; i < 8; ++i) 449 *c++ = (ushort) System.Math.Min (*a++, *b++); 450 return res; 451 } 452 453 [Acceleration (AccelMode.SSE41)] 454 [CLSCompliant (false)] Min(this Vector16sb va, Vector16sb vb)455 public static unsafe Vector16sb Min (this Vector16sb va, Vector16sb vb) { 456 Vector16sb res = new Vector16sb (); 457 sbyte *a = &va.v0; 458 sbyte *b = &vb.v0; 459 sbyte *c = &res.v0; 460 for (int i = 0; i < 16; ++i) 461 *c++ = (sbyte) System.Math.Min(*a++, *b++); 462 return res; 463 } 464 465 [Acceleration (AccelMode.SSE2)] Min(this Vector16b va, Vector16b vb)466 public static unsafe Vector16b Min (this Vector16b va, Vector16b vb) { 467 Vector16b res = new Vector16b (); 468 byte *a = &va.v0; 469 byte *b = &vb.v0; 470 byte *c = &res.v0; 471 for (int i = 0; i < 16; ++i) 472 *c++ = (byte) System.Math.Min(*a++, *b++); 473 return res; 474 } 475 476 477 /* ==== Horizontal operations ==== */ 478 479 [Acceleration (AccelMode.SSE3)] HorizontalAdd(this Vector4f v1, Vector4f v2)480 public static Vector4f HorizontalAdd (this Vector4f v1, Vector4f v2) 481 { 482 return new Vector4f (v1.x + v1.y, v1.z + v1.w, v2.x + v2.y, v2.z + v2.w); 483 } 484 [Acceleration (AccelMode.SSE3)] HorizontalAdd(this Vector2d v1, Vector2d v2)485 public static Vector2d HorizontalAdd (this Vector2d v1, Vector2d v2) 486 { 487 return new Vector2d (v1.x + v1.y, v2.x + v2.y); 488 } 489 490 [Acceleration (AccelMode.SSE3)] HorizontalSub(this Vector4f v1, Vector4f v2)491 public static Vector4f HorizontalSub (this Vector4f v1, Vector4f v2) 492 { 493 return new Vector4f (v1.x - v1.y, v1.z - v1.w, v2.x - v2.y, v2.z - v2.w); 494 } 495 496 [Acceleration (AccelMode.SSE3)] HorizontalSub(this Vector2d v1, Vector2d v2)497 public static Vector2d HorizontalSub (this Vector2d v1, Vector2d v2) 498 { 499 return new Vector2d (v1.x - v1.y, v2.x - v2.y); 500 } 501 502 [Acceleration (AccelMode.SSE3)] AddSub(this Vector4f v1, Vector4f v2)503 public static Vector4f AddSub (this Vector4f v1, Vector4f v2) 504 { 505 return new Vector4f (v1.x - v2.x, v1.y + v2.y, v1.z - v2.z, v1.w + v2.w); 506 } 507 508 [Acceleration (AccelMode.SSE3)] AddSub(this Vector2d v1, Vector2d v2)509 public static Vector2d AddSub (this Vector2d v1, Vector2d v2) 510 { 511 return new Vector2d (v1.x - v2.x, v1.y + v2.y); 512 } 513 514 /* ==== Compare methods ==== */ 515 516 /*Same as a == b. */ 517 [Acceleration (AccelMode.SSE1)] CompareEqual(this Vector4f v1, Vector4f v2)518 public unsafe static Vector4f CompareEqual (this Vector4f v1, Vector4f v2) 519 { 520 Vector4f res = new Vector4f (); 521 int *c = (int*)&res; 522 *c++ = v1.x == v2.x ? -1 : 0; 523 *c++ = v1.y == v2.y ? -1 : 0; 524 *c++ = v1.z == v2.z ? -1 : 0; 525 *c = v1.w == v2.w ? -1 : 0; 526 return res; 527 } 528 529 /*Same as a == b. */ 530 [Acceleration (AccelMode.SSE2)] CompareEqual(this Vector2d v1, Vector2d v2)531 public unsafe static Vector2d CompareEqual (this Vector2d v1, Vector2d v2) 532 { 533 Vector2d res = new Vector2d (); 534 long *c = (long*)&res; 535 *c++ = v1.x == v2.x ? -1 : 0; 536 *c = v1.y == v2.y ? -1 : 0; 537 return res; 538 } 539 540 [Acceleration (AccelMode.SSE41)] CompareEqual(this Vector2l v1, Vector2l v2)541 public static Vector2l CompareEqual (this Vector2l v1, Vector2l v2) 542 { 543 return new Vector2l ((long)(v1.x == v2.x ? -1 : 0), (long)(v1.y == v2.y ? -1 : 0)); 544 } 545 546 [Acceleration (AccelMode.SSE41)] 547 [CLSCompliant (false)] CompareEqual(this Vector2ul v1, Vector2ul v2)548 public static Vector2ul CompareEqual (this Vector2ul v1, Vector2ul v2) 549 { 550 return new Vector2ul ((ulong)(v1.x == v2.x ? -1 : 0), (ulong)(v1.y == v2.y ? -1 : 0)); 551 } 552 553 [Acceleration (AccelMode.SSE2)] CompareEqual(this Vector4i v1, Vector4i v2)554 public static Vector4i CompareEqual (this Vector4i v1, Vector4i v2) 555 { 556 return new Vector4i ((int)(v1.x == v2.x ? -1 : 0), (int)(v1.y == v2.y ? -1 : 0), (int)(v1.z == v2.z ? -1 : 0), (int)(v1.w == v2.w ? -1 : 0)); 557 } 558 559 [Acceleration (AccelMode.SSE2)] 560 [CLSCompliant (false)] CompareEqual(this Vector4ui v1, Vector4ui v2)561 public static Vector4ui CompareEqual (this Vector4ui v1, Vector4ui v2) 562 { 563 return new Vector4ui ((uint)(v1.x == v2.x ? -1 : 0), (uint)(v1.y == v2.y ? -1 : 0), (uint)(v1.z == v2.z ? -1 : 0), (uint)(v1.w == v2.w ? -1 : 0)); 564 } 565 566 [Acceleration (AccelMode.SSE2)] CompareEqual(this Vector8s va, Vector8s vb)567 public static unsafe Vector8s CompareEqual (this Vector8s va, Vector8s vb) { 568 Vector8s res = new Vector8s (); 569 short *a = &va.v0; 570 short *b = &vb.v0; 571 short *c = &res.v0; 572 for (int i = 0; i < 8; ++i) 573 *c++ = (short) (*a++ == *b++ ? -1 : 0); 574 return res; 575 } 576 577 [Acceleration (AccelMode.SSE2)] 578 [CLSCompliant (false)] CompareEqual(this Vector8us va, Vector8us vb)579 public static unsafe Vector8us CompareEqual (this Vector8us va, Vector8us vb) { 580 Vector8us res = new Vector8us (); 581 ushort *a = &va.v0; 582 ushort *b = &vb.v0; 583 ushort *c = &res.v0; 584 for (int i = 0; i < 8; ++i) 585 *c++ = (ushort) (*a++ == *b++ ? -1 : 0); 586 return res; 587 } 588 589 [Acceleration (AccelMode.SSE2)] 590 [CLSCompliant (false)] CompareEqual(this Vector16sb va, Vector16sb vb)591 public static unsafe Vector16sb CompareEqual (this Vector16sb va, Vector16sb vb) { 592 Vector16sb res = new Vector16sb (); 593 sbyte *a = &va.v0; 594 sbyte *b = &vb.v0; 595 sbyte *c = &res.v0; 596 for (int i = 0; i < 16; ++i) 597 *c++ = (sbyte) (*a++ == *b++ ? -1 : 0); 598 return res; 599 } 600 601 [Acceleration (AccelMode.SSE2)] CompareEqual(this Vector16b va, Vector16b vb)602 public static unsafe Vector16b CompareEqual (this Vector16b va, Vector16b vb) { 603 Vector16b res = new Vector16b (); 604 byte *a = &va.v0; 605 byte *b = &vb.v0; 606 byte *c = &res.v0; 607 for (int i = 0; i < 16; ++i) 608 *c++ = (byte) (*a++ == *b++ ? -1 : 0); 609 return res; 610 } 611 612 /*Same as a < b. */ 613 [Acceleration (AccelMode.SSE1)] CompareLessThan(this Vector4f v1, Vector4f v2)614 public unsafe static Vector4f CompareLessThan (this Vector4f v1, Vector4f v2) 615 { 616 Vector4f res = new Vector4f (); 617 int *c = (int*)&res; 618 *c++ = v1.x < v2.x ? -1 : 0; 619 *c++ = v1.y < v2.y ? -1 : 0; 620 *c++ = v1.z < v2.z ? -1 : 0; 621 *c = v1.w < v2.w ? -1 : 0; 622 return res; 623 } 624 625 /*Same as a < b. */ 626 [Acceleration (AccelMode.SSE2)] CompareLessThan(this Vector2d v1, Vector2d v2)627 public unsafe static Vector2d CompareLessThan (this Vector2d v1, Vector2d v2) 628 { 629 Vector2d res = new Vector2d (); 630 long *c = (long*)&res; 631 *c++ = v1.x < v2.x ? -1 : 0; 632 *c = v1.y < v2.y ? -1 : 0; 633 return res; 634 } 635 636 /*Same as a <= b. */ 637 [Acceleration (AccelMode.SSE1)] CompareLessEqual(this Vector4f v1, Vector4f v2)638 public unsafe static Vector4f CompareLessEqual (this Vector4f v1, Vector4f v2) 639 { 640 Vector4f res = new Vector4f (); 641 int *c = (int*)&res; 642 *c++ = v1.x <= v2.x ? -1 : 0; 643 *c++ = v1.y <= v2.y ? -1 : 0; 644 *c++ = v1.z <= v2.z ? -1 : 0; 645 *c = v1.w <= v2.w ? -1 : 0; 646 return res; 647 } 648 649 /*Same as a <= b. */ 650 [Acceleration (AccelMode.SSE2)] CompareLessEqual(this Vector2d v1, Vector2d v2)651 public unsafe static Vector2d CompareLessEqual (this Vector2d v1, Vector2d v2) 652 { 653 Vector2d res = new Vector2d (); 654 long *c = (long*)&res; 655 *c++ = v1.x <= v2.x ? -1 : 0; 656 *c = v1.y <= v2.y ? -1 : 0; 657 return res; 658 } 659 660 [Acceleration (AccelMode.SSE42)] CompareGreaterThan(this Vector2l v1, Vector2l v2)661 public static Vector2l CompareGreaterThan (this Vector2l v1, Vector2l v2) 662 { 663 return new Vector2l ((long)(v1.x > v2.x ? -1 : 0), (long)(v1.y > v2.y ? -1 : 0)); 664 } 665 666 [Acceleration (AccelMode.SSE2)] CompareGreaterThan(this Vector4i v1, Vector4i v2)667 public static Vector4i CompareGreaterThan (this Vector4i v1, Vector4i v2) 668 { 669 return new Vector4i ((int)(v1.x > v2.x ? -1 : 0), (int)(v1.y > v2.y ? -1 : 0), (int)(v1.z > v2.z ? -1 : 0), (int)(v1.w > v2.w ? -1 : 0)); 670 } 671 672 [Acceleration (AccelMode.SSE2)] CompareGreaterThan(this Vector8s va, Vector8s vb)673 public static unsafe Vector8s CompareGreaterThan (this Vector8s va, Vector8s vb) { 674 Vector8s res = new Vector8s (); 675 short *a = &va.v0; 676 short *b = &vb.v0; 677 short *c = &res.v0; 678 for (int i = 0; i < 8; ++i) 679 *c++ = (short) (*a++ > *b++ ? -1 : 0); 680 return res; 681 } 682 683 [Acceleration (AccelMode.SSE2)] 684 [CLSCompliant (false)] CompareGreaterThan(this Vector16sb va, Vector16sb vb)685 public static unsafe Vector16sb CompareGreaterThan (this Vector16sb va, Vector16sb vb) { 686 Vector16sb res = new Vector16sb (); 687 sbyte *a = &va.v0; 688 sbyte *b = &vb.v0; 689 sbyte *c = &res.v0; 690 for (int i = 0; i < 16; ++i) 691 *c++ = (sbyte) (*a++ > *b++ ? -1 : 0); 692 return res; 693 } 694 695 /*Same float.IsNaN (a) || float.IsNaN (b). */ 696 [Acceleration (AccelMode.SSE1)] CompareUnordered(this Vector4f v1, Vector4f v2)697 public unsafe static Vector4f CompareUnordered (this Vector4f v1, Vector4f v2) 698 { 699 Vector4f res = new Vector4f (); 700 int *c = (int*)&res; 701 *c++ = float.IsNaN (v1.x) || float.IsNaN (v2.x) ? -1 : 0; 702 *c++ = float.IsNaN (v1.y) || float.IsNaN (v2.y) ? -1 : 0; 703 *c++ = float.IsNaN (v1.z) || float.IsNaN (v2.z) ? -1 : 0; 704 *c = float.IsNaN (v1.w) || float.IsNaN (v2.w) ? -1 : 0; 705 return res; 706 } 707 708 /*Same double.IsNaN (a) || double.IsNaN (b). */ 709 [Acceleration (AccelMode.SSE2)] CompareUnordered(this Vector2d v1, Vector2d v2)710 public unsafe static Vector2d CompareUnordered (this Vector2d v1, Vector2d v2) 711 { 712 Vector2d res = new Vector2d (); 713 long *c = (long*)&res; 714 *c++ = double.IsNaN (v1.x) || double.IsNaN (v2.x) ? -1 : 0; 715 *c = double.IsNaN (v1.y) || double.IsNaN (v2.y) ? -1 : 0; 716 return res; 717 } 718 719 /*Same as a != b. */ 720 [Acceleration (AccelMode.SSE1)] CompareNotEqual(this Vector4f v1, Vector4f v2)721 public unsafe static Vector4f CompareNotEqual (this Vector4f v1, Vector4f v2) 722 { 723 Vector4f res = new Vector4f (); 724 int *c = (int*)&res; 725 *c++ = v1.x != v2.x ? -1 : 0; 726 *c++ = v1.y != v2.y ? -1 : 0; 727 *c++ = v1.z != v2.z ? -1 : 0; 728 *c = v1.w != v2.w ? -1 : 0; 729 return res; 730 } 731 732 /*Same as a != b. */ 733 [Acceleration (AccelMode.SSE2)] CompareNotEqual(this Vector2d v1, Vector2d v2)734 public unsafe static Vector2d CompareNotEqual (this Vector2d v1, Vector2d v2) 735 { 736 Vector2d res = new Vector2d (); 737 long *c = (long*)&res; 738 *c++ = v1.x != v2.x ? -1 : 0; 739 *c = v1.y != v2.y ? -1 : 0; 740 return res; 741 } 742 743 /*Same as !(a < b). */ 744 [Acceleration (AccelMode.SSE1)] CompareNotLessThan(this Vector4f v1, Vector4f v2)745 public unsafe static Vector4f CompareNotLessThan (this Vector4f v1, Vector4f v2) 746 { 747 Vector4f res = new Vector4f (); 748 int *c = (int*)&res; 749 *c++ = v1.x < v2.x ? 0 : -1; 750 *c++ = v1.y < v2.y ? 0 : -1; 751 *c++ = v1.z < v2.z ? 0 : -1; 752 *c = v1.w < v2.w ? 0 : -1; 753 return res; 754 } 755 756 /*Same as !(a < b). */ 757 [Acceleration (AccelMode.SSE2)] CompareNotLessThan(this Vector2d v1, Vector2d v2)758 public unsafe static Vector2d CompareNotLessThan (this Vector2d v1, Vector2d v2) 759 { 760 Vector2d res = new Vector2d (); 761 long *c = (long*)&res; 762 *c++ = v1.x < v2.x ? 0 : -1; 763 *c = v1.y < v2.y ? 0 : -1; 764 return res; 765 } 766 767 /*Same as !(a <= b). */ 768 [Acceleration (AccelMode.SSE1)] CompareNotLessEqual(this Vector4f v1, Vector4f v2)769 public unsafe static Vector4f CompareNotLessEqual (this Vector4f v1, Vector4f v2) 770 { 771 Vector4f res = new Vector4f (); 772 int *c = (int*)&res; 773 *c++ = v1.x <= v2.x ? 0 : -1; 774 *c++ = v1.y <= v2.y ? 0 : -1; 775 *c++ = v1.z <= v2.z ? 0 : -1; 776 *c = v1.w <= v2.w ? 0 : -1; 777 return res; 778 } 779 780 /*Same as !(a <= b). */ 781 [Acceleration (AccelMode.SSE2)] CompareNotLessEqual(this Vector2d v1, Vector2d v2)782 public unsafe static Vector2d CompareNotLessEqual (this Vector2d v1, Vector2d v2) 783 { 784 Vector2d res = new Vector2d (); 785 long *c = (long*)&res; 786 *c++ = v1.x <= v2.x ? 0 : -1; 787 *c = v1.y <= v2.y ? 0 : -1; 788 return res; 789 } 790 791 /*Same !float.IsNaN (a) && !float.IsNaN (b). */ 792 [Acceleration (AccelMode.SSE1)] CompareOrdered(this Vector4f v1, Vector4f v2)793 public unsafe static Vector4f CompareOrdered (this Vector4f v1, Vector4f v2) 794 { 795 Vector4f res = new Vector4f (); 796 int *c = (int*)&res; 797 *c++ = !float.IsNaN (v1.x) && !float.IsNaN (v2.x) ? -1 : 0; 798 *c++ = !float.IsNaN (v1.y) && !float.IsNaN (v2.y) ? -1 : 0; 799 *c++ = !float.IsNaN (v1.z) && !float.IsNaN (v2.z) ? -1 : 0; 800 *c = !float.IsNaN (v1.w) && !float.IsNaN (v2.w) ? -1 : 0; 801 return res; 802 } 803 804 /*Same !double.IsNaN (a) && !double.IsNaN (b). */ 805 [Acceleration (AccelMode.SSE2)] CompareOrdered(this Vector2d v1, Vector2d v2)806 public unsafe static Vector2d CompareOrdered (this Vector2d v1, Vector2d v2) 807 { 808 Vector2d res = new Vector2d (); 809 long *c = (long*)&res; 810 *c++ = !double.IsNaN (v1.x) && !double.IsNaN (v2.x) ? -1 : 0; 811 *c = !double.IsNaN (v1.y) && !double.IsNaN (v2.y) ? -1 : 0; 812 return res; 813 } 814 815 816 /* ==== Data shuffling ==== */ 817 818 [Acceleration (AccelMode.SSE1)] InterleaveHigh(this Vector4f v1, Vector4f v2)819 public static Vector4f InterleaveHigh (this Vector4f v1, Vector4f v2) 820 { 821 return new Vector4f (v1.z, v2.z, v1.w, v2.w); 822 } 823 824 [Acceleration (AccelMode.SSE2)] InterleaveHigh(this Vector2d v1, Vector2d v2)825 public static Vector2d InterleaveHigh (this Vector2d v1, Vector2d v2) 826 { 827 return new Vector2d (v1.y, v2.y); 828 } 829 830 [Acceleration (AccelMode.SSE1)] InterleaveLow(this Vector4f v1, Vector4f v2)831 public static Vector4f InterleaveLow (this Vector4f v1, Vector4f v2) 832 { 833 return new Vector4f (v1.x, v2.x, v1.y, v2.y); 834 } 835 836 [Acceleration (AccelMode.SSE2)] InterleaveLow(this Vector2d v1, Vector2d v2)837 public static Vector2d InterleaveLow (this Vector2d v1, Vector2d v2) 838 { 839 return new Vector2d (v1.x, v2.x); 840 } 841 842 [Acceleration (AccelMode.SSE3)] Duplicate(this Vector2d v1)843 public static Vector2d Duplicate (this Vector2d v1) 844 { 845 return new Vector2d (v1.x, v1.x); 846 } 847 848 [Acceleration (AccelMode.SSE3)] DuplicateLow(this Vector4f v1)849 public static Vector4f DuplicateLow (this Vector4f v1) 850 { 851 return new Vector4f (v1.x, v1.x, v1.z, v1.z); 852 } 853 854 [Acceleration (AccelMode.SSE3)] DuplicateHigh(this Vector4f v1)855 public static Vector4f DuplicateHigh (this Vector4f v1) 856 { 857 return new Vector4f (v1.y, v1.y, v1.w, v1.w); 858 } 859 860 [Acceleration (AccelMode.SSE2)] UnpackLow(this Vector2l v1, Vector2l v2)861 public static Vector2l UnpackLow (this Vector2l v1, Vector2l v2) 862 { 863 return new Vector2l (v1.x, v2.x); 864 } 865 866 [Acceleration (AccelMode.SSE2)] 867 [CLSCompliant (false)] UnpackLow(this Vector2ul v1, Vector2ul v2)868 public static Vector2ul UnpackLow (this Vector2ul v1, Vector2ul v2) 869 { 870 return new Vector2ul (v1.x, v2.x); 871 } 872 873 [Acceleration (AccelMode.SSE2)] UnpackLow(this Vector4i v1, Vector4i v2)874 public static Vector4i UnpackLow (this Vector4i v1, Vector4i v2) 875 { 876 return new Vector4i (v1.x, v2.x, v1.y, v2.y); 877 } 878 879 [Acceleration (AccelMode.SSE2)] 880 [CLSCompliant (false)] UnpackLow(this Vector4ui v1, Vector4ui v2)881 public static Vector4ui UnpackLow (this Vector4ui v1, Vector4ui v2) 882 { 883 return new Vector4ui (v1.x, v2.x, v1.y, v2.y); 884 } 885 886 [Acceleration (AccelMode.SSE2)] UnpackLow(this Vector8s va, Vector8s vb)887 public static unsafe Vector8s UnpackLow (this Vector8s va, Vector8s vb) 888 { 889 return new Vector8s (va.v0, vb.v0, va.v1, vb.v1, va.v2, vb.v2, va.v3, vb.v3); 890 } 891 892 [Acceleration (AccelMode.SSE2)] 893 [CLSCompliant (false)] UnpackLow(this Vector8us va, Vector8us vb)894 public static unsafe Vector8us UnpackLow (this Vector8us va, Vector8us vb) 895 { 896 return new Vector8us (va.v0, vb.v0, va.v1, vb.v1, va.v2, vb.v2, va.v3, vb.v3); 897 } 898 899 [Acceleration (AccelMode.SSE2)] 900 [CLSCompliant (false)] UnpackLow(this Vector16sb va, Vector16sb vb)901 public static unsafe Vector16sb UnpackLow (this Vector16sb va, Vector16sb vb) 902 { 903 return new Vector16sb (va.v0, vb.v0, va.v1, vb.v1, va.v2, vb.v2, va.v3, vb.v3, va.v4, vb.v4, va.v5, vb.v5, va.v6, vb.v6, va.v7, vb.v7); 904 } 905 906 [Acceleration (AccelMode.SSE2)] UnpackLow(this Vector16b va, Vector16b vb)907 public static unsafe Vector16b UnpackLow (this Vector16b va, Vector16b vb) 908 { 909 return new Vector16b (va.v0, vb.v0, va.v1, vb.v1, va.v2, vb.v2, va.v3, vb.v3, va.v4, vb.v4, va.v5, vb.v5, va.v6, vb.v6, va.v7, vb.v7); 910 } 911 912 [Acceleration (AccelMode.SSE2)] UnpackHigh(this Vector2l v1, Vector2l v2)913 public static Vector2l UnpackHigh (this Vector2l v1, Vector2l v2) 914 { 915 return new Vector2l (v1.y, v2.y); 916 } 917 918 [Acceleration (AccelMode.SSE2)] 919 [CLSCompliant (false)] UnpackHigh(this Vector2ul v1, Vector2ul v2)920 public static Vector2ul UnpackHigh (this Vector2ul v1, Vector2ul v2) 921 { 922 return new Vector2ul (v1.y, v2.y); 923 } 924 925 [Acceleration (AccelMode.SSE2)] UnpackHigh(this Vector4i v1, Vector4i v2)926 public static Vector4i UnpackHigh (this Vector4i v1, Vector4i v2) 927 { 928 return new Vector4i (v1.z, v2.z, v1.w, v2.w); 929 } 930 931 [Acceleration (AccelMode.SSE2)] 932 [CLSCompliant (false)] UnpackHigh(this Vector4ui v1, Vector4ui v2)933 public static Vector4ui UnpackHigh (this Vector4ui v1, Vector4ui v2) 934 { 935 return new Vector4ui (v1.z, v2.z, v1.w, v2.w); 936 } 937 938 [Acceleration (AccelMode.SSE2)] UnpackHigh(this Vector8s va, Vector8s vb)939 public static unsafe Vector8s UnpackHigh (this Vector8s va, Vector8s vb) 940 { 941 return new Vector8s (va.v4, vb.v4, va.v5, vb.v5, va.v6, vb.v6, va.v7, vb.v7); 942 } 943 944 [Acceleration (AccelMode.SSE2)] 945 [CLSCompliant (false)] UnpackHigh(this Vector8us va, Vector8us vb)946 public static unsafe Vector8us UnpackHigh (this Vector8us va, Vector8us vb) 947 { 948 return new Vector8us (va.v4, vb.v4, va.v5, vb.v5, va.v6, vb.v6, va.v7, vb.v7); 949 } 950 951 [Acceleration (AccelMode.SSE2)] 952 [CLSCompliant (false)] UnpackHigh(this Vector16sb va, Vector16sb vb)953 public static unsafe Vector16sb UnpackHigh (this Vector16sb va, Vector16sb vb) 954 { 955 return new Vector16sb (va.v8, vb.v8, va.v9, vb.v9, va.v10, vb.v10, va.v11, vb.v11, va.v12, vb.v12, va.v13, vb.v13, va.v14, vb.v14, va.v15, vb.v15); 956 } 957 958 [Acceleration (AccelMode.SSE2)] UnpackHigh(this Vector16b va, Vector16b vb)959 public static unsafe Vector16b UnpackHigh (this Vector16b va, Vector16b vb) 960 { 961 return new Vector16b (va.v8, vb.v8, va.v9, vb.v9, va.v10, vb.v10, va.v11, vb.v11, va.v12, vb.v12, va.v13, vb.v13, va.v14, vb.v14, va.v15, vb.v15); 962 } 963 964 [Acceleration (AccelMode.SSE2)] Shuffle(this Vector4f v1, Vector4f v2, ShuffleSel sel)965 public static unsafe Vector4f Shuffle (this Vector4f v1, Vector4f v2, ShuffleSel sel) 966 { 967 float *p1 = (float*)&v1; 968 float *p2 = (float*)&v2; 969 int idx = (int)sel; 970 return new Vector4f (*(p1 + ((idx >> 0) & 0x3)), *(p1 + ((idx >> 2) & 0x3)), *(p2 + ((idx >> 4) & 0x3)), *(p2 + ((idx >> 6) & 0x3))); 971 } 972 973 [Acceleration (AccelMode.SSE2)] Shuffle(this Vector4i v1, Vector4i v2, ShuffleSel sel)974 public static unsafe Vector4i Shuffle (this Vector4i v1, Vector4i v2, ShuffleSel sel) 975 { 976 int *p1 = (int*)&v1; 977 int *p2 = (int*)&v2; 978 int idx = (int)sel; 979 return new Vector4i (*(p1 + ((idx >> 0) & 0x3)), *(p1 + ((idx >> 2) & 0x3)), *(p2 + ((idx >> 4) & 0x3)), *(p2 + ((idx >> 6) & 0x3))); 980 } 981 982 [Acceleration (AccelMode.SSE2)] Shuffle(this Vector4ui v1, Vector4ui v2, ShuffleSel sel)983 public static unsafe Vector4ui Shuffle (this Vector4ui v1, Vector4ui v2, ShuffleSel sel) 984 { 985 uint *p1 = (uint*)&v1; 986 uint *p2 = (uint*)&v2; 987 int idx = (int)sel; 988 return new Vector4ui (*(p1 + ((idx >> 0) & 0x3)), *(p1 + ((idx >> 2) & 0x3)), *(p2 + ((idx >> 4) & 0x3)), *(p2 + ((idx >> 6) & 0x3))); 989 } 990 991 [Acceleration (AccelMode.SSE2)] Shuffle(this Vector2d v1, Vector2d v2, int sel)992 public static unsafe Vector2d Shuffle (this Vector2d v1, Vector2d v2, int sel) 993 { 994 double *p1 = (double*)&v1; 995 double *p2 = (double*)&v2; 996 return new Vector2d (*(p1 + ((sel >> 0) & 0x1)), *(p2 + ((sel >> 1) & 0x1))); 997 } 998 999 [Acceleration (AccelMode.SSE2)] Shuffle(this Vector2l v1, Vector2l v2, int sel)1000 public static unsafe Vector2l Shuffle (this Vector2l v1, Vector2l v2, int sel) 1001 { 1002 long *p1 = (long*)&v1; 1003 long *p2 = (long*)&v2; 1004 return new Vector2l (*(p1 + ((sel >> 0) & 0x1)), *(p2 + ((sel >> 1) & 0x1))); 1005 } 1006 1007 [Acceleration (AccelMode.SSE2)] Shuffle(this Vector2ul v1, Vector2ul v2, int sel)1008 public static unsafe Vector2ul Shuffle (this Vector2ul v1, Vector2ul v2, int sel) 1009 { 1010 ulong *p1 = (ulong*)&v1; 1011 ulong *p2 = (ulong*)&v2; 1012 return new Vector2ul (*(p1 + ((sel >> 0) & 0x1)), *(p2 + ((sel >> 1) & 0x1))); 1013 } 1014 1015 [Acceleration (AccelMode.SSE2)] Shuffle(this Vector4f v1, ShuffleSel sel)1016 public static unsafe Vector4f Shuffle (this Vector4f v1, ShuffleSel sel) 1017 { 1018 float *ptr = (float*)&v1; 1019 int idx = (int)sel; 1020 return new Vector4f (*(ptr + ((idx >> 0) & 0x3)),*(ptr + ((idx >> 2) & 0x3)),*(ptr + ((idx >> 4) & 0x3)),*(ptr + ((idx >> 6) & 0x3))); 1021 } 1022 1023 [Acceleration (AccelMode.SSE2)] Shuffle(this Vector4i v1, ShuffleSel sel)1024 public static unsafe Vector4i Shuffle (this Vector4i v1, ShuffleSel sel) 1025 { 1026 int *ptr = (int*)&v1; 1027 int idx = (int)sel; 1028 return new Vector4i (*(ptr + ((idx >> 0) & 0x3)),*(ptr + ((idx >> 2) & 0x3)),*(ptr + ((idx >> 4) & 0x3)),*(ptr + ((idx >> 6) & 0x3))); 1029 } 1030 1031 [Acceleration (AccelMode.SSE2)] 1032 [CLSCompliant (false)] Shuffle(this Vector4ui v1, ShuffleSel sel)1033 public static unsafe Vector4ui Shuffle (this Vector4ui v1, ShuffleSel sel) 1034 { 1035 uint *ptr = (uint*)&v1; 1036 int idx = (int)sel; 1037 return new Vector4ui (*(ptr + ((idx >> 0) & 0x3)),*(ptr + ((idx >> 2) & 0x3)),*(ptr + ((idx >> 4) & 0x3)),*(ptr + ((idx >> 6) & 0x3))); 1038 } 1039 1040 [Acceleration (AccelMode.SSE2)] ShuffleHigh(this Vector8s va, ShuffleSel sel)1041 public static unsafe Vector8s ShuffleHigh (this Vector8s va, ShuffleSel sel) 1042 { 1043 short *ptr = ((short*)&va) + 4; 1044 int idx = (int)sel; 1045 return new Vector8s (va.v0, va.v1, va.v2, va.v3, *(ptr + ((idx >> 0) & 0x3)), *(ptr + ((idx >> 2) & 0x3)), *(ptr + ((idx >> 4) & 0x3)), *(ptr + ((idx >> 6) & 0x3))); 1046 } 1047 1048 [Acceleration (AccelMode.SSE2)] 1049 [CLSCompliant (false)] ShuffleHigh(this Vector8us va, ShuffleSel sel)1050 public static unsafe Vector8us ShuffleHigh (this Vector8us va, ShuffleSel sel) 1051 { 1052 ushort *ptr = ((ushort*)&va) + 4; 1053 int idx = (int)sel; 1054 return new Vector8us (va.v0, va.v1, va.v2, va.v3, *(ptr + ((idx >> 0) & 0x3)), *(ptr + ((idx >> 2) & 0x3)), *(ptr + ((idx >> 4) & 0x3)), *(ptr + ((idx >> 6) & 0x3))); 1055 } 1056 1057 [Acceleration (AccelMode.SSE2)] ShuffleLow(this Vector8s va, ShuffleSel sel)1058 public static unsafe Vector8s ShuffleLow (this Vector8s va, ShuffleSel sel) 1059 { 1060 short *ptr = ((short*)&va); 1061 int idx = (int)sel; 1062 return new Vector8s (*(ptr + ((idx >> 0) & 0x3)), *(ptr + ((idx >> 2) & 0x3)), *(ptr + ((idx >> 4) & 0x3)), *(ptr + ((idx >> 6) & 0x3)), va.v4, va.v5, va.v6, va.v7); 1063 } 1064 1065 [Acceleration (AccelMode.SSE2)] 1066 [CLSCompliant (false)] ShuffleLow(this Vector8us va, ShuffleSel sel)1067 public static unsafe Vector8us ShuffleLow (this Vector8us va, ShuffleSel sel) 1068 { 1069 ushort *ptr = ((ushort*)&va); 1070 int idx = (int)sel; 1071 return new Vector8us (*(ptr + ((idx >> 0) & 0x3)), *(ptr + ((idx >> 2) & 0x3)), *(ptr + ((idx >> 4) & 0x3)), *(ptr + ((idx >> 6) & 0x3)), va.v4, va.v5, va.v6, va.v7); 1072 } 1073 1074 [CLSCompliant(false)] 1075 [Acceleration (AccelMode.SSE41)] PackWithUnsignedSaturation(this Vector4i va, Vector4i vb)1076 public static unsafe Vector8us PackWithUnsignedSaturation (this Vector4i va, Vector4i vb) { 1077 Vector8us res = new Vector8us (); 1078 int *a = (int*)&va; 1079 int *b = (int*)&vb; 1080 ushort *c = (ushort*)&res; 1081 for (int i = 0; i < 4; ++i) 1082 *c++ = (ushort)System.Math.Max (0, System.Math.Min (*a++, ushort.MaxValue)); 1083 for (int i = 0; i < 4; ++i) 1084 *c++ = (ushort)System.Math.Max (0, System.Math.Min (*b++, ushort.MaxValue)); 1085 return res; 1086 } 1087 1088 1089 [Acceleration (AccelMode.SSE2)] PackWithUnsignedSaturation(this Vector8s va, Vector8s vb)1090 public static unsafe Vector16b PackWithUnsignedSaturation (this Vector8s va, Vector8s vb) { 1091 Vector16b res = new Vector16b (); 1092 short *a = (short*)&va; 1093 short *b = (short*)&vb; 1094 byte *c = (byte*)&res; 1095 for (int i = 0; i < 8; ++i) 1096 *c++ = (byte)System.Math.Max (0, System.Math.Min ((int)*a++, byte.MaxValue)); 1097 for (int i = 0; i < 8; ++i) 1098 *c++ = (byte)System.Math.Max (0, System.Math.Min ((int)*b++, byte.MaxValue)); 1099 return res; 1100 } 1101 1102 [Acceleration (AccelMode.SSE2)] PackWithSignedSaturation(this Vector4i va, Vector4i vb)1103 public static unsafe Vector8s PackWithSignedSaturation (this Vector4i va, Vector4i vb) { 1104 Vector8s res = new Vector8s (); 1105 int *a = (int*)&va; 1106 int *b = (int*)&vb; 1107 short *c = (short*)&res; 1108 for (int i = 0; i < 4; ++i) 1109 *c++ = (short)System.Math.Max (System.Math.Min ((int)*a++, short.MaxValue), short.MinValue); 1110 for (int i = 0; i < 4; ++i) 1111 *c++ = (short)System.Math.Max (System.Math.Min ((int)*b++, short.MaxValue), short.MinValue); 1112 return res; 1113 } 1114 1115 [CLSCompliant(false)] 1116 [Acceleration (AccelMode.SSE2)] PackWithSignedSaturation(this Vector8s va, Vector8s vb)1117 public static unsafe Vector16sb PackWithSignedSaturation (this Vector8s va, Vector8s vb) { 1118 Vector16sb res = new Vector16sb (); 1119 short *a = (short*)&va; 1120 short *b = (short*)&vb; 1121 sbyte *c = (sbyte*)&res; 1122 for (int i = 0; i < 8; ++i) 1123 *c++ = (sbyte)System.Math.Max (System.Math.Min ((int)*a++, sbyte.MaxValue), sbyte.MinValue); 1124 for (int i = 0; i < 8; ++i) 1125 *c++ = (sbyte)System.Math.Max (System.Math.Min ((int)*b++, sbyte.MaxValue), sbyte.MinValue); 1126 return res; 1127 } 1128 1129 /* This function performs a packusdw, which treats the source as a signed value */ 1130 [Acceleration (AccelMode.SSE41)] 1131 [CLSCompliant (false)] SignedPackWithUnsignedSaturation(this Vector4ui va, Vector4ui vb)1132 public static unsafe Vector8us SignedPackWithUnsignedSaturation (this Vector4ui va, Vector4ui vb) { 1133 Vector8us res = new Vector8us (); 1134 int *a = (int*)&va; 1135 int *b = (int*)&vb; 1136 ushort *c = (ushort*)&res; 1137 for (int i = 0; i < 4; ++i) 1138 *c++ = (ushort)System.Math.Max (0, System.Math.Min (*a++, ushort.MaxValue)); 1139 for (int i = 0; i < 4; ++i) 1140 *c++ = (ushort)System.Math.Max (0, System.Math.Min (*b++, ushort.MaxValue)); 1141 return res; 1142 } 1143 1144 /*This function performs a packuswb, which treats the source as a signed value */ 1145 [Acceleration (AccelMode.SSE2)] 1146 [CLSCompliant (false)] SignedPackWithUnsignedSaturation(this Vector8us va, Vector8us vb)1147 public static unsafe Vector16b SignedPackWithUnsignedSaturation (this Vector8us va, Vector8us vb) { 1148 Vector16b res = new Vector16b (); 1149 short *a = (short*)&va; 1150 short *b = (short*)&vb; 1151 byte *c = (byte*)&res; 1152 for (int i = 0; i < 8; ++i) 1153 *c++ = (byte)System.Math.Max (0, System.Math.Min ((int)*a++, byte.MaxValue)); 1154 for (int i = 0; i < 8; ++i) 1155 *c++ = (byte)System.Math.Max (0, System.Math.Min ((int)*b++, byte.MaxValue)); 1156 return res; 1157 } 1158 1159 /* This function performs a packssdw, which treats the source as a signed value*/ 1160 [Acceleration (AccelMode.SSE2)] 1161 [CLSCompliant (false)] SignedPackWithSignedSaturation(this Vector4ui va, Vector4ui vb)1162 public static unsafe Vector8s SignedPackWithSignedSaturation (this Vector4ui va, Vector4ui vb) { 1163 Vector8s res = new Vector8s (); 1164 int *a = (int*)&va; 1165 int *b = (int*)&vb; 1166 short *c = (short*)&res; 1167 for (int i = 0; i < 4; ++i) 1168 *c++ = (short)System.Math.Max (System.Math.Min ((int)*a++, short.MaxValue), short.MinValue); 1169 for (int i = 0; i < 4; ++i) 1170 *c++ = (short)System.Math.Max (System.Math.Min ((int)*b++, short.MaxValue), short.MinValue); 1171 return res; 1172 } 1173 1174 /*This function performs a packsswb, which treats the source as a signed value */ 1175 [Acceleration (AccelMode.SSE2)] 1176 [CLSCompliant (false)] SignedPackWithSignedSaturation(this Vector8us va, Vector8us vb)1177 public static unsafe Vector16sb SignedPackWithSignedSaturation (this Vector8us va, Vector8us vb) { 1178 Vector16sb res = new Vector16sb (); 1179 short *a = (short*)&va; 1180 short *b = (short*)&vb; 1181 sbyte *c = (sbyte*)&res; 1182 for (int i = 0; i < 8; ++i) 1183 *c++ = (sbyte)System.Math.Max (System.Math.Min ((int)*a++, sbyte.MaxValue), sbyte.MinValue); 1184 for (int i = 0; i < 8; ++i) 1185 *c++ = (sbyte)System.Math.Max (System.Math.Min ((int)*b++, sbyte.MaxValue), sbyte.MinValue); 1186 return res; 1187 } 1188 1189 [Acceleration (AccelMode.SSE2)] ConvertToFloat(this Vector4i v0)1190 public static unsafe Vector4f ConvertToFloat (this Vector4i v0) { 1191 return new Vector4f (v0.X, v0.Y, v0.Z, v0.W); 1192 } 1193 1194 [Acceleration (AccelMode.SSE2)] ConvertToDouble(this Vector4i v0)1195 public static unsafe Vector2d ConvertToDouble (this Vector4i v0) { 1196 return new Vector2d (v0.X, v0.Y); 1197 } 1198 1199 [Acceleration (AccelMode.SSE2)] ConvertToInt(this Vector2d v0)1200 public static unsafe Vector4i ConvertToInt (this Vector2d v0) { 1201 return new Vector4i ((int)System.Math.Round (v0.X), (int)System.Math.Round (v0.Y), 0, 0); 1202 } 1203 1204 [Acceleration (AccelMode.SSE2)] ConvertToIntTruncated(this Vector2d v0)1205 public static unsafe Vector4i ConvertToIntTruncated (this Vector2d v0) { 1206 return new Vector4i ((int) (v0.X), (int) (v0.Y), 0, 0); 1207 } 1208 1209 [Acceleration (AccelMode.SSE2)] ConvertToFloat(this Vector2d v0)1210 public static unsafe Vector4f ConvertToFloat (this Vector2d v0) { 1211 return new Vector4f ((float)v0.X, (float)v0.Y, 0, 0); 1212 } 1213 1214 [Acceleration (AccelMode.SSE2)] ConvertToInt(this Vector4f v0)1215 public static unsafe Vector4i ConvertToInt (this Vector4f v0) { 1216 return new Vector4i ((int)System.Math.Round (v0.X), (int)System.Math.Round (v0.Y), (int)System.Math.Round (v0.Z), (int)System.Math.Round (v0.W)); 1217 } 1218 1219 [Acceleration (AccelMode.SSE2)] ConvertToIntTruncated(this Vector4f v0)1220 public static unsafe Vector4i ConvertToIntTruncated (this Vector4f v0) { 1221 return new Vector4i ((int)v0.X, (int)v0.Y, (int)v0.Z, (int)v0.W); 1222 } 1223 1224 [Acceleration (AccelMode.SSE2)] ConvertToDouble(this Vector4f v0)1225 public static unsafe Vector2d ConvertToDouble (this Vector4f v0) { 1226 return new Vector2d (v0.X, v0.Y); 1227 } 1228 } 1229 } 1230