1 // VectorOperations.cs
2 //
3 // Author:
4 //   Rodrigo Kumpera (rkumpera@novell.com)
5 //
6 // (C) 2008 Novell, Inc. (http://www.novell.com)
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining
9 // a copy of this software and associated documentation files (the
10 // "Software"), to deal in the Software without restriction, including
11 // without limitation the rights to use, copy, modify, merge, publish,
12 // distribute, sublicense, and/or sell copies of the Software, and to
13 // permit persons to whom the Software is furnished to do so, subject to
14 // the following conditions:
15 //
16 // The above copyright notice and this permission notice shall be
17 // included in all copies or substantial portions of the Software.
18 //
19 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
23 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 //
27 
28 using System;
29 namespace Mono.Simd
30 {
31 	public static class VectorOperations
32 	{
33 		/* ==== Bitwise operations ==== */
34 
35 		[Acceleration (AccelMode.SSE1)]
AndNot(this Vector4f v1, Vector4f v2)36 		public static unsafe Vector4f AndNot (this Vector4f v1, Vector4f v2)
37 		{
38 			Vector4f res = new Vector4f ();
39 			int *a = (int*)&v1;
40 			int *b = (int*)&v2;
41 			int *c = (int*)&res;
42 			*c++ = ~*a++ & *b++;
43 			*c++ = ~*a++ & *b++;
44 			*c++ = ~*a++ & *b++;
45 			*c = ~*a & *b;
46 			return res;
47 		}
48 
49 		[Acceleration (AccelMode.SSE2)]
AndNot(this Vector2d v1, Vector2d v2)50 		public static unsafe Vector2d AndNot (this Vector2d v1, Vector2d v2)
51 		{
52 			Vector2d res = new Vector2d ();
53 			int *a = (int*)&v1;
54 			int *b = (int*)&v2;
55 			int *c = (int*)&res;
56 			*c++ = ~*a++ & *b++;
57 			*c++ = ~*a++ & *b++;
58 			*c++ = ~*a++ & *b++;
59 			*c = ~*a & *b;
60 			return res;
61 		}
62 
63 		[Acceleration (AccelMode.SSE2)]
64 		[CLSCompliant (false)]
ArithmeticRightShift(this Vector4ui v1, int amount)65 		public static unsafe Vector4ui ArithmeticRightShift (this Vector4ui v1, int amount)
66 		{
67 			Vector4ui res = new Vector4ui ();
68 			uint *a = &v1.x;
69 			uint *b = &res.x;
70 			for (int i = 0; i < 4; ++i)
71 				*b++ = (uint)((int)(*a++) >> amount);
72 			return res;
73 		}
74 
75 		[Acceleration (AccelMode.SSE2)]
76 		[CLSCompliant (false)]
ArithmeticRightShift(this Vector8us va, int amount)77 		public static unsafe Vector8us ArithmeticRightShift (this Vector8us va, int amount)
78 		{
79 			Vector8us res = new Vector8us ();
80 			ushort *a = &va.v0;
81 			ushort *b = &res.v0;
82 			for (int i = 0; i < 8; ++i)
83 				*b++ = (ushort)((short)(*a++) >> amount);
84 			return res;
85 		}
86 
87 		[Acceleration (AccelMode.SSE2)]
LogicalRightShift(this Vector2l v1, int amount)88 		public static unsafe Vector2l LogicalRightShift (this Vector2l v1, int amount)
89 		{
90 			return new Vector2l ((long)((ulong)(v1.x) >> amount), (long)((ulong)(v1.y) >> amount));
91 		}
92 
93 		[Acceleration (AccelMode.SSE2)]
LogicalRightShift(this Vector4i v1, int amount)94 		public static unsafe Vector4i LogicalRightShift (this Vector4i v1, int amount)
95 		{
96 			Vector4i res = new Vector4i ();
97 			int *a = &v1.x;
98 			int *b = &res.x;
99 			for (int i = 0; i < 4; ++i)
100 				*b++ = (int)((uint)(*a++) >> amount);
101 			return res;
102 		}
103 
104 		[Acceleration (AccelMode.SSE2)]
LogicalRightShift(this Vector8s va, int amount)105 		public static unsafe Vector8s LogicalRightShift (this Vector8s va, int amount)
106 		{
107 			Vector8s res = new Vector8s ();
108 			short *a = &va.v0;
109 			short *b = &res.v0;
110 			for (int i = 0; i < 8; ++i)
111 				*b++ = (short)((ushort)(*a++) >> amount);
112 			return res;
113 		}
114 
115 		[Acceleration (AccelMode.SSE2)]
116 		[CLSCompliant (false)]
ExtractByteMask(this Vector16sb va)117 		public static unsafe int ExtractByteMask (this Vector16sb va) {
118 			int res = 0;
119 			sbyte *a = (sbyte*)&va;
120 			for (int i = 0; i < 16; ++i)
121 				res |= (*a++ & 0x80) >> 7 << i;
122 			return res;
123 		}
124 
125 		[Acceleration (AccelMode.SSE2)]
ExtractByteMask(this Vector16b va)126 		public static unsafe int ExtractByteMask (this Vector16b va) {
127 			int res = 0;
128 			byte *a = (byte*)&va;
129 			for (int i = 0; i < 16; ++i)
130 				res |= (*a++ & 0x80) >> 7 << i;
131 			return res;
132 		}
133 
134 
135 		/* ==== Math operations ==== */
136 
137 		[Acceleration (AccelMode.SSE2)]
AddWithSaturation(this Vector8s va, Vector8s vb)138 		public static unsafe Vector8s AddWithSaturation (this Vector8s va, Vector8s vb) {
139 			Vector8s res = new Vector8s ();
140 			short *a = &va.v0;
141 			short *b = &vb.v0;
142 			short *c = &res.v0;
143 			for (int i = 0; i < 8; ++i)
144 				*c++ = (short) System.Math.Max (System.Math.Min (*a++ + *b++, short.MaxValue), short.MinValue);
145 			return res;
146 		}
147 
148 		[Acceleration (AccelMode.SSE2)]
149 		[CLSCompliant (false)]
AddWithSaturation(this Vector8us va, Vector8us vb)150 		public static unsafe Vector8us AddWithSaturation (this Vector8us va, Vector8us vb) {
151 			Vector8us res = new Vector8us ();
152 			ushort *a = &va.v0;
153 			ushort *b = &vb.v0;
154 			ushort *c = &res.v0;
155 			for (int i = 0; i < 8; ++i)
156 				*c++ = (ushort) System.Math.Min (*a++ + *b++, ushort.MaxValue);
157 			return res;
158 		}
159 
160 		[Acceleration (AccelMode.SSE2)]
161 		[CLSCompliant (false)]
AddWithSaturation(this Vector16sb va, Vector16sb vb)162 		public static unsafe Vector16sb AddWithSaturation (this Vector16sb va, Vector16sb vb) {
163 			Vector16sb res = new Vector16sb ();
164 			sbyte *a = &va.v0;
165 			sbyte *b = &vb.v0;
166 			sbyte *c = &res.v0;
167 			for (int i = 0; i < 16; ++i)
168 				*c++ = (sbyte) System.Math.Max (System.Math.Min (*a++ + *b++, sbyte.MaxValue), sbyte.MinValue);
169 			return res;
170 		}
171 
172 		[Acceleration (AccelMode.SSE2)]
AddWithSaturation(this Vector16b va, Vector16b vb)173 		public static unsafe Vector16b AddWithSaturation (this Vector16b va, Vector16b vb) {
174 			Vector16b res = new Vector16b ();
175 			byte *a = &va.v0;
176 			byte *b = &vb.v0;
177 			byte *c = &res.v0;
178 			for (int i = 0; i < 16; ++i)
179 				*c++ = (byte) System.Math.Min (*a++ + *b++, byte.MaxValue);
180 			return res;
181 		}
182 
183 		[Acceleration (AccelMode.SSE2)]
SubtractWithSaturation(this Vector8s va, Vector8s vb)184 		public static unsafe Vector8s SubtractWithSaturation (this Vector8s va, Vector8s vb) {
185 			Vector8s res = new Vector8s ();
186 			short *a = &va.v0;
187 			short *b = &vb.v0;
188 			short *c = &res.v0;
189 			for (int i = 0; i < 8; ++i)
190 				*c++ = (short) System.Math.Max (System.Math.Min (*a++ - *b++, short.MaxValue), short.MinValue); ;
191 			return res;
192 		}
193 
194 		[Acceleration (AccelMode.SSE2)]
195 		[CLSCompliant (false)]
SubtractWithSaturation(this Vector8us va, Vector8us vb)196 		public static unsafe Vector8us SubtractWithSaturation (this Vector8us va, Vector8us vb) {
197 			Vector8us res = new Vector8us ();
198 			ushort *a = &va.v0;
199 			ushort *b = &vb.v0;
200 			ushort *c = &res.v0;
201 			for (int i = 0; i < 8; ++i)
202 				*c++ = (ushort) System.Math.Max (*a++ - *b++, 0);
203 			return res;
204 		}
205 
206 		[Acceleration (AccelMode.SSE2)]
207 		[CLSCompliant (false)]
SubtractWithSaturation(this Vector16sb va, Vector16sb vb)208 		public static unsafe Vector16sb SubtractWithSaturation (this Vector16sb va, Vector16sb vb) {
209 			Vector16sb res = new Vector16sb ();
210 			sbyte *a = &va.v0;
211 			sbyte *b = &vb.v0;
212 			sbyte *c = &res.v0;
213 			for (int i = 0; i < 16; ++i)
214 				*c++ = (sbyte) System.Math.Max (System.Math.Min (*a++ - *b++, sbyte.MaxValue), sbyte.MinValue);
215 			return res;
216 		}
217 
218 		[Acceleration (AccelMode.SSE2)]
SubtractWithSaturation(this Vector16b va, Vector16b vb)219 		public static unsafe Vector16b SubtractWithSaturation (this Vector16b va, Vector16b vb) {
220 			Vector16b res = new Vector16b ();
221 			byte *a = &va.v0;
222 			byte *b = &vb.v0;
223 			byte *c = &res.v0;
224 			for (int i = 0; i < 16; ++i)
225 				*c++ = (byte) System.Math.Max (*a++ - *b++, 0);
226 			return res;
227 		}
228 
229 		[Acceleration (AccelMode.SSE2)]
MultiplyStoreHigh(this Vector8s va, Vector8s vb)230 		public static unsafe Vector8s MultiplyStoreHigh (this Vector8s va, Vector8s vb) {
231 			Vector8s res = new Vector8s ();
232 			short *a = &va.v0;
233 			short *b = &vb.v0;
234 			short *c = &res.v0;
235 			for (int i = 0; i < 8; ++i)
236 				*c++ = (short)((int)*a++ * (int)*b++ >> 16);
237 			return res;
238 		}
239 
240 		[Acceleration (AccelMode.SSE2)]
241 		[CLSCompliant (false)]
MultiplyStoreHigh(this Vector8us va, Vector8us vb)242 		public static unsafe Vector8us MultiplyStoreHigh (this Vector8us va, Vector8us vb) {
243 			Vector8us res = new Vector8us ();
244 			ushort *a = &va.v0;
245 			ushort *b = &vb.v0;
246 			ushort *c = &res.v0;
247 			for (int i = 0; i < 8; ++i)
248 				*c++ = (ushort)((uint)*a++ * (uint)*b++ >> 16);
249 			return res;
250 		}
251 
252 		[CLSCompliant(false)]
253 		[Acceleration (AccelMode.SSE2)]
SumOfAbsoluteDifferences(this Vector16b va, Vector16sb vb)254 		public static unsafe Vector8us SumOfAbsoluteDifferences (this Vector16b va, Vector16sb vb) {
255 			Vector8us res = new Vector8us ();
256 			byte *a = &va.v0;
257 			sbyte *b = (sbyte*)&vb;
258 
259 			int tmp = 0;
260 			for (int i = 0; i < 8; ++i)
261 				tmp += System.Math.Abs ((int)*a++ - (int)*b++);
262 			res.V0 = (ushort)tmp;
263 
264 			tmp = 0;
265 			for (int i = 0; i < 8; ++i)
266 				tmp += System.Math.Abs ((int)*a++ - (int)*b++);
267 			res.V4 = (ushort)tmp;
268 
269 			return res;
270 		}
271 
272 		[Acceleration (AccelMode.SSE1)]
Sqrt(this Vector4f v1)273 		public static Vector4f Sqrt (this Vector4f v1)
274 		{
275 			return new Vector4f ((float)System.Math.Sqrt ((float)v1.x),
276 								(float)System.Math.Sqrt ((float)v1.y),
277 								(float)System.Math.Sqrt ((float)v1.z),
278 								(float)System.Math.Sqrt ((float)v1.w));
279 		}
280 
281 		[Acceleration (AccelMode.SSE1)]
InvSqrt(this Vector4f v1)282 		public static Vector4f InvSqrt (this Vector4f v1)
283 		{
284 			return new Vector4f ((float)(1.0 / System.Math.Sqrt ((float)v1.x)),
285 								(float)(1.0 / System.Math.Sqrt ((float)v1.y)),
286 								(float)(1.0 / System.Math.Sqrt ((float)v1.z)),
287 								(float)(1.0 / System.Math.Sqrt ((float)v1.w)));
288 		}
289 
290 		[Acceleration (AccelMode.SSE1)]
Reciprocal(this Vector4f v1)291 		public static Vector4f Reciprocal (this Vector4f v1)
292 		{
293 			return new Vector4f (1.0f / v1.x, 1.0f / v1.y, 1.0f / v1.z, 1.0f / v1.w);
294 		}
295 
296 		[Acceleration (AccelMode.SSE2)]
Sqrt(this Vector2d v1)297 		public static Vector2d Sqrt (this Vector2d v1)
298 		{
299 			return new Vector2d (System.Math.Sqrt (v1.x),
300 								System.Math.Sqrt (v1.y));
301 		}
302 
303 		[Acceleration (AccelMode.SSE2)]
304 		[CLSCompliant (false)]
Average(this Vector8us va, Vector8us vb)305 		public static unsafe Vector8us Average (this Vector8us va, Vector8us vb) {
306 			Vector8us res = new Vector8us ();
307 			ushort *a = &va.v0;
308 			ushort *b = &vb.v0;
309 			ushort *c = &res.v0;
310 			for (int i = 0; i < 8; ++i)
311 				*c++ = (ushort) ((*a++ + *b++ + 1) >> 1);
312 			return res;
313 		}
314 
315 		[Acceleration (AccelMode.SSE2)]
Average(this Vector16b va, Vector16b vb)316 		public static unsafe Vector16b Average (this Vector16b va, Vector16b vb) {
317 			Vector16b res = new Vector16b ();
318 			byte *a = &va.v0;
319 			byte *b = &vb.v0;
320 			byte *c = &res.v0;
321 			for (int i = 0; i < 16; ++i)
322 				*c++ = (byte) ((*a++ + *b++ + 1) >> 1);
323 			return res;
324 		}
325 
326 		[Acceleration (AccelMode.SSE1)]
Max(this Vector4f v1, Vector4f v2)327 		public static Vector4f Max (this Vector4f v1, Vector4f v2)
328 		{
329 			return new Vector4f (System.Math.Max (v1.x, v2.x),
330 								System.Math.Max (v1.y, v2.y),
331 								System.Math.Max (v1.z, v2.z),
332 								System.Math.Max (v1.w, v2.w));
333 		}
334 
335 		[Acceleration (AccelMode.SSE2)]
Max(this Vector2d v1, Vector2d v2)336 		public static Vector2d Max (this Vector2d v1, Vector2d v2)
337 		{
338 			return new Vector2d (System.Math.Max (v1.x, v2.x),
339 								System.Math.Max (v1.y, v2.y));
340 		}
341 
342 		[Acceleration (AccelMode.SSE41)]
Max(this Vector4i v1, Vector4i v2)343 		public static Vector4i Max (this Vector4i v1, Vector4i v2)
344 		{
345 			return new Vector4i (System.Math.Max (v1.x, v2.x), System.Math.Max (v1.y, v2.y), System.Math.Max (v1.z, v2.z), System.Math.Max (v1.w, v2.w));
346 		}
347 
348 		[Acceleration (AccelMode.SSE41)]
349 		[CLSCompliant (false)]
Max(this Vector4ui v1, Vector4ui v2)350 		public static Vector4ui Max (this Vector4ui v1, Vector4ui v2)
351 		{
352 			return new Vector4ui (System.Math.Max (v1.x, v2.x), System.Math.Max (v1.y, v2.y), System.Math.Max (v1.z, v2.z), System.Math.Max (v1.w, v2.w));
353 		}
354 
355 		[Acceleration (AccelMode.SSE2)]
Max(this Vector8s va, Vector8s vb)356 		public static unsafe Vector8s Max (this Vector8s va, Vector8s vb) {
357 			Vector8s res = new Vector8s ();
358 			short *a = &va.v0;
359 			short *b = &vb.v0;
360 			short *c = &res.v0;
361 			for (int i = 0; i < 8; ++i)
362 				*c++ = (short) System.Math.Max (*a++, *b++);
363 			return res;
364 		}
365 
366 		[Acceleration (AccelMode.SSE41)]
367 		[CLSCompliant (false)]
Max(this Vector8us va, Vector8us vb)368 		public static unsafe Vector8us Max (this Vector8us va, Vector8us vb) {
369 			Vector8us res = new Vector8us ();
370 			ushort *a = &va.v0;
371 			ushort *b = &vb.v0;
372 			ushort *c = &res.v0;
373 			for (int i = 0; i < 8; ++i)
374 				*c++ = (ushort) System.Math.Max (*a++, *b++);
375 			return res;
376 		}
377 
378 		[Acceleration (AccelMode.SSE1)]
Min(this Vector4f v1, Vector4f v2)379 		public static Vector4f Min (this Vector4f v1, Vector4f v2)
380 		{
381 			return new Vector4f (System.Math.Min (v1.x, v2.x),
382 								System.Math.Min (v1.y, v2.y),
383 								System.Math.Min (v1.z, v2.z),
384 								System.Math.Min (v1.w, v2.w));
385 		}
386 
387 		[Acceleration (AccelMode.SSE41)]
388 		[CLSCompliant (false)]
Max(this Vector16sb va, Vector16sb vb)389 		public static unsafe Vector16sb Max (this Vector16sb va, Vector16sb vb) {
390 			Vector16sb res = new Vector16sb ();
391 			sbyte *a = &va.v0;
392 			sbyte *b = &vb.v0;
393 			sbyte *c = &res.v0;
394 			for (int i = 0; i < 16; ++i)
395 				*c++ = (sbyte) System.Math.Max (*a++, *b++);
396 			return res;
397 		}
398 
399 		[Acceleration (AccelMode.SSE2)]
Max(this Vector16b va, Vector16b vb)400 		public static unsafe Vector16b Max (this Vector16b va, Vector16b vb) {
401 			Vector16b res = new Vector16b ();
402 			byte *a = &va.v0;
403 			byte *b = &vb.v0;
404 			byte *c = &res.v0;
405 			for (int i = 0; i < 16; ++i)
406 				*c++ = (byte) System.Math.Max(*a++, *b++);
407 			return res;
408 		}
409 
410 		[Acceleration (AccelMode.SSE2)]
Min(this Vector2d v1, Vector2d v2)411 		public static Vector2d Min (this Vector2d v1, Vector2d v2)
412 		{
413 			return new Vector2d (System.Math.Min (v1.x, v2.x),
414 								System.Math.Min (v1.y, v2.y));
415 		}
416 
417 		[Acceleration (AccelMode.SSE41)]
Min(this Vector4i v1, Vector4i v2)418 		public static Vector4i Min (this Vector4i v1, Vector4i v2)
419 		{
420 			return new Vector4i (System.Math.Min (v1.x, v2.x), System.Math.Min (v1.y, v2.y), System.Math.Min (v1.z, v2.z), System.Math.Min (v1.w, v2.w));
421 		}
422 
423 		[Acceleration (AccelMode.SSE41)]
424 		[CLSCompliant (false)]
Min(this Vector4ui v1, Vector4ui v2)425 		public static Vector4ui Min (this Vector4ui v1, Vector4ui v2)
426 		{
427 			return new Vector4ui (System.Math.Min (v1.x, v2.x), System.Math.Min (v1.y, v2.y), System.Math.Min (v1.z, v2.z), System.Math.Min (v1.w, v2.w));
428 		}
429 
430 		[Acceleration (AccelMode.SSE2)]
Min(this Vector8s va, Vector8s vb)431 		public static unsafe Vector8s Min (this Vector8s va, Vector8s vb) {
432 			Vector8s res = new Vector8s ();
433 			short *a = &va.v0;
434 			short *b = &vb.v0;
435 			short *c = &res.v0;
436 			for (int i = 0; i < 8; ++i)
437 				*c++ = (short) System.Math.Min (*a++, *b++);
438 			return res;
439 		}
440 
441 		[Acceleration (AccelMode.SSE41)]
442 		[CLSCompliant (false)]
Min(this Vector8us va, Vector8us vb)443 		public static unsafe Vector8us Min (this Vector8us va, Vector8us vb) {
444 			Vector8us res = new Vector8us ();
445 			ushort *a = &va.v0;
446 			ushort *b = &vb.v0;
447 			ushort *c = &res.v0;
448 			for (int i = 0; i < 8; ++i)
449 				*c++ = (ushort) System.Math.Min (*a++, *b++);
450 			return res;
451 		}
452 
453 		[Acceleration (AccelMode.SSE41)]
454 		[CLSCompliant (false)]
Min(this Vector16sb va, Vector16sb vb)455 		public static unsafe Vector16sb Min (this Vector16sb va, Vector16sb vb) {
456 			Vector16sb res = new Vector16sb ();
457 			sbyte *a = &va.v0;
458 			sbyte *b = &vb.v0;
459 			sbyte *c = &res.v0;
460 			for (int i = 0; i < 16; ++i)
461 				*c++ = (sbyte) System.Math.Min(*a++, *b++);
462 			return res;
463 		}
464 
465 		[Acceleration (AccelMode.SSE2)]
Min(this Vector16b va, Vector16b vb)466 		public static unsafe Vector16b Min (this Vector16b va, Vector16b vb) {
467 			Vector16b res = new Vector16b ();
468 			byte *a = &va.v0;
469 			byte *b = &vb.v0;
470 			byte *c = &res.v0;
471 			for (int i = 0; i < 16; ++i)
472 				*c++ = (byte) System.Math.Min(*a++, *b++);
473 			return res;
474 		}
475 
476 
477 		/* ==== Horizontal operations ==== */
478 
479 		[Acceleration (AccelMode.SSE3)]
HorizontalAdd(this Vector4f v1, Vector4f v2)480 		public static Vector4f HorizontalAdd (this Vector4f v1, Vector4f v2)
481 		{
482 			return new Vector4f (v1.x + v1.y, v1.z + v1.w, v2.x + v2.y, v2.z + v2.w);
483 		}
484 		[Acceleration (AccelMode.SSE3)]
HorizontalAdd(this Vector2d v1, Vector2d v2)485 		public static Vector2d HorizontalAdd (this Vector2d v1, Vector2d v2)
486 		{
487 			return new Vector2d (v1.x + v1.y, v2.x + v2.y);
488 		}
489 
490 		[Acceleration (AccelMode.SSE3)]
HorizontalSub(this Vector4f v1, Vector4f v2)491 		public static Vector4f HorizontalSub (this Vector4f v1, Vector4f v2)
492 		{
493 			return new Vector4f (v1.x - v1.y, v1.z - v1.w, v2.x - v2.y, v2.z - v2.w);
494 		}
495 
496 		[Acceleration (AccelMode.SSE3)]
HorizontalSub(this Vector2d v1, Vector2d v2)497 		public static Vector2d HorizontalSub (this Vector2d v1, Vector2d v2)
498 		{
499 			return new Vector2d (v1.x - v1.y, v2.x - v2.y);
500 		}
501 
502 		[Acceleration (AccelMode.SSE3)]
AddSub(this Vector4f v1, Vector4f v2)503 		public static Vector4f AddSub (this Vector4f v1, Vector4f v2)
504 		{
505 			return new Vector4f (v1.x - v2.x, v1.y + v2.y, v1.z - v2.z, v1.w + v2.w);
506 		}
507 
508 		[Acceleration (AccelMode.SSE3)]
AddSub(this Vector2d v1, Vector2d v2)509 		public static Vector2d AddSub (this Vector2d v1, Vector2d v2)
510 		{
511 			return new Vector2d (v1.x - v2.x, v1.y + v2.y);
512 		}
513 
514 		/* ==== Compare methods ==== */
515 
516 		/*Same as a == b. */
517 		[Acceleration (AccelMode.SSE1)]
CompareEqual(this Vector4f v1, Vector4f v2)518 		public unsafe static Vector4f CompareEqual (this Vector4f v1, Vector4f v2)
519 		{
520 			Vector4f res = new Vector4f ();
521 			int *c = (int*)&res;
522 			*c++ = v1.x == v2.x ? -1 : 0;
523 			*c++ = v1.y == v2.y ? -1 : 0;
524 			*c++ = v1.z == v2.z ? -1 : 0;
525 			*c = v1.w == v2.w ? -1 : 0;
526 			return res;
527 		}
528 
529 		/*Same as a == b. */
530 		[Acceleration (AccelMode.SSE2)]
CompareEqual(this Vector2d v1, Vector2d v2)531 		public unsafe static Vector2d CompareEqual (this Vector2d v1, Vector2d v2)
532 		{
533 			Vector2d res = new Vector2d ();
534 			long *c = (long*)&res;
535 			*c++ = v1.x == v2.x ? -1 : 0;
536 			*c = v1.y == v2.y ? -1 : 0;
537 			return res;
538 		}
539 
540 		[Acceleration (AccelMode.SSE41)]
CompareEqual(this Vector2l v1, Vector2l v2)541 		public static Vector2l CompareEqual (this Vector2l v1, Vector2l v2)
542 		{
543 			return new Vector2l ((long)(v1.x ==  v2.x ? -1 : 0), (long)(v1.y ==  v2.y ? -1 : 0));
544 		}
545 
546 		[Acceleration (AccelMode.SSE41)]
547 		[CLSCompliant (false)]
CompareEqual(this Vector2ul v1, Vector2ul v2)548 		public static Vector2ul CompareEqual (this Vector2ul v1, Vector2ul v2)
549 		{
550 			return new Vector2ul ((ulong)(v1.x ==  v2.x ? -1 : 0), (ulong)(v1.y ==  v2.y ? -1 : 0));
551 		}
552 
553 		[Acceleration (AccelMode.SSE2)]
CompareEqual(this Vector4i v1, Vector4i v2)554 		public static Vector4i CompareEqual (this Vector4i v1, Vector4i v2)
555 		{
556 			return new Vector4i ((int)(v1.x ==  v2.x ? -1 : 0), (int)(v1.y ==  v2.y ? -1 : 0), (int)(v1.z ==  v2.z ? -1 : 0), (int)(v1.w ==  v2.w ? -1 : 0));
557 		}
558 
559 		[Acceleration (AccelMode.SSE2)]
560 		[CLSCompliant (false)]
CompareEqual(this Vector4ui v1, Vector4ui v2)561 		public static Vector4ui CompareEqual (this Vector4ui v1, Vector4ui v2)
562 		{
563 			return new Vector4ui ((uint)(v1.x ==  v2.x ? -1 : 0), (uint)(v1.y ==  v2.y ? -1 : 0), (uint)(v1.z ==  v2.z ? -1 : 0), (uint)(v1.w ==  v2.w ? -1 : 0));
564 		}
565 
566 		[Acceleration (AccelMode.SSE2)]
CompareEqual(this Vector8s va, Vector8s vb)567 		public static unsafe Vector8s CompareEqual (this Vector8s va, Vector8s vb) {
568 			Vector8s res = new Vector8s ();
569 			short *a = &va.v0;
570 			short *b = &vb.v0;
571 			short *c = &res.v0;
572 			for (int i = 0; i < 8; ++i)
573 				*c++ = (short) (*a++ == *b++ ? -1 : 0);
574 			return res;
575 		}
576 
577 		[Acceleration (AccelMode.SSE2)]
578 		[CLSCompliant (false)]
CompareEqual(this Vector8us va, Vector8us vb)579 		public static unsafe Vector8us CompareEqual (this Vector8us va, Vector8us vb) {
580 			Vector8us res = new Vector8us ();
581 			ushort *a = &va.v0;
582 			ushort *b = &vb.v0;
583 			ushort *c = &res.v0;
584 			for (int i = 0; i < 8; ++i)
585 				*c++ = (ushort) (*a++ == *b++ ? -1 : 0);
586 			return res;
587 		}
588 
589 		[Acceleration (AccelMode.SSE2)]
590 		[CLSCompliant (false)]
CompareEqual(this Vector16sb va, Vector16sb vb)591 		public static unsafe Vector16sb CompareEqual (this Vector16sb va, Vector16sb vb) {
592 			Vector16sb res = new Vector16sb ();
593 			sbyte *a = &va.v0;
594 			sbyte *b = &vb.v0;
595 			sbyte *c = &res.v0;
596 			for (int i = 0; i < 16; ++i)
597 				*c++ = (sbyte) (*a++ == *b++ ? -1 : 0);
598 			return res;
599 		}
600 
601 		[Acceleration (AccelMode.SSE2)]
CompareEqual(this Vector16b va, Vector16b vb)602 		public static unsafe Vector16b CompareEqual (this Vector16b va, Vector16b vb) {
603 			Vector16b res = new Vector16b ();
604 			byte *a = &va.v0;
605 			byte *b = &vb.v0;
606 			byte *c = &res.v0;
607 			for (int i = 0; i < 16; ++i)
608 				*c++ = (byte) (*a++ == *b++ ? -1 : 0);
609 			return res;
610 		}
611 
612 		/*Same as a < b. */
613 		[Acceleration (AccelMode.SSE1)]
CompareLessThan(this Vector4f v1, Vector4f v2)614 		public unsafe static Vector4f CompareLessThan (this Vector4f v1, Vector4f v2)
615 		{
616 			Vector4f res = new Vector4f ();
617 			int *c = (int*)&res;
618 			*c++ = v1.x < v2.x ? -1 : 0;
619 			*c++ = v1.y < v2.y ? -1 : 0;
620 			*c++ = v1.z < v2.z ? -1 : 0;
621 			*c = v1.w < v2.w ? -1 : 0;
622 			return res;
623 		}
624 
625 		/*Same as a < b. */
626 		[Acceleration (AccelMode.SSE2)]
CompareLessThan(this Vector2d v1, Vector2d v2)627 		public unsafe static Vector2d CompareLessThan (this Vector2d v1, Vector2d v2)
628 		{
629 			Vector2d res = new Vector2d ();
630 			long *c = (long*)&res;
631 			*c++ = v1.x < v2.x ? -1 : 0;
632 			*c = v1.y < v2.y ? -1 : 0;
633 			return res;
634 		}
635 
636 		/*Same as a <= b. */
637 		[Acceleration (AccelMode.SSE1)]
CompareLessEqual(this Vector4f v1, Vector4f v2)638 		public unsafe static Vector4f CompareLessEqual (this Vector4f v1, Vector4f v2)
639 		{
640 			Vector4f res = new Vector4f ();
641 			int *c = (int*)&res;
642 			*c++ = v1.x <= v2.x ? -1 : 0;
643 			*c++ = v1.y <= v2.y ? -1 : 0;
644 			*c++ = v1.z <= v2.z ? -1 : 0;
645 			*c = v1.w <= v2.w ? -1 : 0;
646 			return res;
647 		}
648 
649 		/*Same as a <= b. */
650 		[Acceleration (AccelMode.SSE2)]
CompareLessEqual(this Vector2d v1, Vector2d v2)651 		public unsafe static Vector2d CompareLessEqual (this Vector2d v1, Vector2d v2)
652 		{
653 			Vector2d res = new Vector2d ();
654 			long *c = (long*)&res;
655 			*c++ = v1.x <= v2.x ? -1 : 0;
656 			*c = v1.y <= v2.y ? -1 : 0;
657 			return res;
658 		}
659 
660 		[Acceleration (AccelMode.SSE42)]
CompareGreaterThan(this Vector2l v1, Vector2l v2)661 		public static Vector2l CompareGreaterThan (this Vector2l v1, Vector2l v2)
662 		{
663 			return new Vector2l ((long)(v1.x > v2.x ? -1 : 0), (long)(v1.y >  v2.y ? -1 : 0));
664 		}
665 
666 		[Acceleration (AccelMode.SSE2)]
CompareGreaterThan(this Vector4i v1, Vector4i v2)667 		public static Vector4i CompareGreaterThan (this Vector4i v1, Vector4i v2)
668 		{
669 			return new Vector4i ((int)(v1.x > v2.x ? -1 : 0), (int)(v1.y >  v2.y ? -1 : 0), (int)(v1.z >  v2.z ? -1 : 0), (int)(v1.w >  v2.w ? -1 : 0));
670 		}
671 
672 		[Acceleration (AccelMode.SSE2)]
CompareGreaterThan(this Vector8s va, Vector8s vb)673 		public static unsafe Vector8s CompareGreaterThan (this Vector8s va, Vector8s vb) {
674 			Vector8s res = new Vector8s ();
675 			short *a = &va.v0;
676 			short *b = &vb.v0;
677 			short *c = &res.v0;
678 			for (int i = 0; i < 8; ++i)
679 				*c++ = (short) (*a++ > *b++ ? -1 : 0);
680 			return res;
681 		}
682 
683 		[Acceleration (AccelMode.SSE2)]
684 		[CLSCompliant (false)]
CompareGreaterThan(this Vector16sb va, Vector16sb vb)685 		public static unsafe Vector16sb CompareGreaterThan (this Vector16sb va, Vector16sb vb) {
686 			Vector16sb res = new Vector16sb ();
687 			sbyte *a = &va.v0;
688 			sbyte *b = &vb.v0;
689 			sbyte *c = &res.v0;
690 			for (int i = 0; i < 16; ++i)
691 				*c++ = (sbyte) (*a++ > *b++ ? -1 : 0);
692 			return res;
693 		}
694 
695 		/*Same float.IsNaN (a) || float.IsNaN (b). */
696 		[Acceleration (AccelMode.SSE1)]
CompareUnordered(this Vector4f v1, Vector4f v2)697 		public unsafe static Vector4f CompareUnordered (this Vector4f v1, Vector4f v2)
698 		{
699 			Vector4f res = new Vector4f ();
700 			int *c = (int*)&res;
701 			*c++ = float.IsNaN (v1.x) || float.IsNaN (v2.x) ? -1 : 0;
702 			*c++ = float.IsNaN (v1.y) || float.IsNaN (v2.y) ? -1 : 0;
703 			*c++ = float.IsNaN (v1.z) || float.IsNaN (v2.z) ? -1 : 0;
704 			*c = float.IsNaN (v1.w) || float.IsNaN (v2.w) ? -1 : 0;
705 			return res;
706 		}
707 
708 		/*Same double.IsNaN (a) || double.IsNaN (b). */
709 		[Acceleration (AccelMode.SSE2)]
CompareUnordered(this Vector2d v1, Vector2d v2)710 		public unsafe static Vector2d CompareUnordered (this Vector2d v1, Vector2d v2)
711 		{
712 			Vector2d res = new Vector2d ();
713 			long *c = (long*)&res;
714 			*c++ = double.IsNaN (v1.x) || double.IsNaN (v2.x) ? -1 : 0;
715 			*c = double.IsNaN (v1.y) || double.IsNaN (v2.y) ? -1 : 0;
716 			return res;
717 		}
718 
719 		/*Same as a != b. */
720 		[Acceleration (AccelMode.SSE1)]
CompareNotEqual(this Vector4f v1, Vector4f v2)721 		public unsafe static Vector4f CompareNotEqual (this Vector4f v1, Vector4f v2)
722 		{
723 			Vector4f res = new Vector4f ();
724 			int *c = (int*)&res;
725 			*c++ = v1.x != v2.x ? -1 : 0;
726 			*c++ = v1.y != v2.y ? -1 : 0;
727 			*c++ = v1.z != v2.z ? -1 : 0;
728 			*c = v1.w != v2.w ? -1 : 0;
729 			return res;
730 		}
731 
732 		/*Same as a != b. */
733 		[Acceleration (AccelMode.SSE2)]
CompareNotEqual(this Vector2d v1, Vector2d v2)734 		public unsafe static Vector2d CompareNotEqual (this Vector2d v1, Vector2d v2)
735 		{
736 			Vector2d res = new Vector2d ();
737 			long *c = (long*)&res;
738 			*c++ = v1.x != v2.x ? -1 : 0;
739 			*c = v1.y != v2.y ? -1 : 0;
740 			return res;
741 		}
742 
743 		/*Same as !(a < b). */
744 		[Acceleration (AccelMode.SSE1)]
CompareNotLessThan(this Vector4f v1, Vector4f v2)745 		public unsafe static Vector4f CompareNotLessThan (this Vector4f v1, Vector4f v2)
746 		{
747 			Vector4f res = new Vector4f ();
748 			int *c = (int*)&res;
749 			*c++ = v1.x < v2.x ? 0 : -1;
750 			*c++ = v1.y < v2.y ? 0 : -1;
751 			*c++ = v1.z < v2.z ? 0 : -1;
752 			*c = v1.w < v2.w ? 0 : -1;
753 			return res;
754 		}
755 
756 		/*Same as !(a < b). */
757 		[Acceleration (AccelMode.SSE2)]
CompareNotLessThan(this Vector2d v1, Vector2d v2)758 		public unsafe static Vector2d CompareNotLessThan (this Vector2d v1, Vector2d v2)
759 		{
760 			Vector2d res = new Vector2d ();
761 			long *c = (long*)&res;
762 			*c++ = v1.x < v2.x ? 0 : -1;
763 			*c = v1.y < v2.y ? 0 : -1;
764 			return res;
765 		}
766 
767 		/*Same as !(a <= b). */
768 		[Acceleration (AccelMode.SSE1)]
CompareNotLessEqual(this Vector4f v1, Vector4f v2)769 		public unsafe static Vector4f CompareNotLessEqual (this Vector4f v1, Vector4f v2)
770 		{
771 			Vector4f res = new Vector4f ();
772 			int *c = (int*)&res;
773 			*c++ = v1.x <= v2.x ? 0 : -1;
774 			*c++ = v1.y <= v2.y ? 0 : -1;
775 			*c++ = v1.z <= v2.z ? 0 : -1;
776 			*c = v1.w <= v2.w ? 0 : -1;
777 			return res;
778 		}
779 
780 		/*Same as !(a <= b). */
781 		[Acceleration (AccelMode.SSE2)]
CompareNotLessEqual(this Vector2d v1, Vector2d v2)782 		public unsafe static Vector2d CompareNotLessEqual (this Vector2d v1, Vector2d v2)
783 		{
784 			Vector2d res = new Vector2d ();
785 			long *c = (long*)&res;
786 			*c++ = v1.x <= v2.x ? 0 : -1;
787 			*c = v1.y <= v2.y ? 0 : -1;
788 			return res;
789 		}
790 
791 		/*Same !float.IsNaN (a) && !float.IsNaN (b). */
792 		[Acceleration (AccelMode.SSE1)]
CompareOrdered(this Vector4f v1, Vector4f v2)793 		public unsafe static Vector4f CompareOrdered (this Vector4f v1, Vector4f v2)
794 		{
795 			Vector4f res = new Vector4f ();
796 			int *c = (int*)&res;
797 			*c++ = !float.IsNaN (v1.x) && !float.IsNaN (v2.x) ? -1 : 0;
798 			*c++ = !float.IsNaN (v1.y) && !float.IsNaN (v2.y) ? -1 : 0;
799 			*c++ = !float.IsNaN (v1.z) && !float.IsNaN (v2.z) ? -1 : 0;
800 			*c = !float.IsNaN (v1.w) && !float.IsNaN (v2.w) ? -1 : 0;
801 			return res;
802 		}
803 
804 		/*Same !double.IsNaN (a) && !double.IsNaN (b). */
805 		[Acceleration (AccelMode.SSE2)]
CompareOrdered(this Vector2d v1, Vector2d v2)806 		public unsafe static Vector2d CompareOrdered (this Vector2d v1, Vector2d v2)
807 		{
808 			Vector2d res = new Vector2d ();
809 			long *c = (long*)&res;
810 			*c++ = !double.IsNaN (v1.x) && !double.IsNaN (v2.x) ? -1 : 0;
811 			*c = !double.IsNaN (v1.y) && !double.IsNaN (v2.y) ? -1 : 0;
812 			return res;
813 		}
814 
815 
816 		/* ==== Data shuffling ==== */
817 
818 		[Acceleration (AccelMode.SSE1)]
InterleaveHigh(this Vector4f v1, Vector4f v2)819 		public static Vector4f InterleaveHigh (this Vector4f v1, Vector4f v2)
820 		{
821 			return new Vector4f (v1.z, v2.z, v1.w, v2.w);
822 		}
823 
824 		[Acceleration (AccelMode.SSE2)]
InterleaveHigh(this Vector2d v1, Vector2d v2)825 		public static Vector2d InterleaveHigh (this Vector2d v1, Vector2d v2)
826 		{
827 			return new Vector2d (v1.y, v2.y);
828 		}
829 
830 		[Acceleration (AccelMode.SSE1)]
InterleaveLow(this Vector4f v1, Vector4f v2)831 		public static Vector4f InterleaveLow (this Vector4f v1, Vector4f v2)
832 		{
833 			return new Vector4f (v1.x, v2.x, v1.y, v2.y);
834 		}
835 
836 		[Acceleration (AccelMode.SSE2)]
InterleaveLow(this Vector2d v1, Vector2d v2)837 		public static Vector2d InterleaveLow (this Vector2d v1, Vector2d v2)
838 		{
839 			return new Vector2d (v1.x, v2.x);
840 		}
841 
842 		[Acceleration (AccelMode.SSE3)]
Duplicate(this Vector2d v1)843 		public static Vector2d Duplicate (this Vector2d v1)
844 		{
845 			return new Vector2d (v1.x, v1.x);
846 		}
847 
848 		[Acceleration (AccelMode.SSE3)]
DuplicateLow(this Vector4f v1)849 		public static Vector4f DuplicateLow (this Vector4f v1)
850 		{
851 			return new Vector4f (v1.x, v1.x, v1.z, v1.z);
852 		}
853 
854 		[Acceleration (AccelMode.SSE3)]
DuplicateHigh(this Vector4f v1)855 		public static Vector4f DuplicateHigh (this Vector4f v1)
856 		{
857 			return new Vector4f (v1.y, v1.y, v1.w, v1.w);
858 		}
859 
860 		[Acceleration (AccelMode.SSE2)]
UnpackLow(this Vector2l v1, Vector2l v2)861 		public static Vector2l UnpackLow (this Vector2l v1, Vector2l v2)
862 		{
863 			return new Vector2l (v1.x, v2.x);
864 		}
865 
866 		[Acceleration (AccelMode.SSE2)]
867 		[CLSCompliant (false)]
UnpackLow(this Vector2ul v1, Vector2ul v2)868 		public static Vector2ul UnpackLow (this Vector2ul v1, Vector2ul v2)
869 		{
870 			return new Vector2ul (v1.x, v2.x);
871 		}
872 
873 		[Acceleration (AccelMode.SSE2)]
UnpackLow(this Vector4i v1, Vector4i v2)874 		public static Vector4i UnpackLow (this Vector4i v1, Vector4i v2)
875 		{
876 			return new Vector4i (v1.x, v2.x, v1.y, v2.y);
877 		}
878 
879 		[Acceleration (AccelMode.SSE2)]
880 		[CLSCompliant (false)]
UnpackLow(this Vector4ui v1, Vector4ui v2)881 		public static Vector4ui UnpackLow (this Vector4ui v1, Vector4ui v2)
882 		{
883 			return new Vector4ui (v1.x, v2.x, v1.y, v2.y);
884 		}
885 
886 		[Acceleration (AccelMode.SSE2)]
UnpackLow(this Vector8s va, Vector8s vb)887 		public static unsafe Vector8s UnpackLow (this Vector8s va, Vector8s vb)
888 		{
889 			return new Vector8s (va.v0, vb.v0, va.v1, vb.v1, va.v2, vb.v2, va.v3, vb.v3);
890 		}
891 
892 		[Acceleration (AccelMode.SSE2)]
893 		[CLSCompliant (false)]
UnpackLow(this Vector8us va, Vector8us vb)894 		public static unsafe Vector8us UnpackLow (this Vector8us va, Vector8us vb)
895 		{
896 			return new Vector8us (va.v0, vb.v0, va.v1, vb.v1, va.v2, vb.v2, va.v3, vb.v3);
897 		}
898 
899 		[Acceleration (AccelMode.SSE2)]
900 		[CLSCompliant (false)]
UnpackLow(this Vector16sb va, Vector16sb vb)901 		public static unsafe Vector16sb UnpackLow (this Vector16sb va, Vector16sb vb)
902 		{
903 			return new Vector16sb (va.v0, vb.v0, va.v1, vb.v1, va.v2, vb.v2, va.v3, vb.v3, va.v4, vb.v4, va.v5, vb.v5, va.v6, vb.v6, va.v7, vb.v7);
904 		}
905 
906 		[Acceleration (AccelMode.SSE2)]
UnpackLow(this Vector16b va, Vector16b vb)907 		public static unsafe Vector16b UnpackLow (this Vector16b va, Vector16b vb)
908 		{
909 			return new Vector16b (va.v0, vb.v0, va.v1, vb.v1, va.v2, vb.v2, va.v3, vb.v3, va.v4, vb.v4, va.v5, vb.v5, va.v6, vb.v6, va.v7, vb.v7);
910 		}
911 
912 		[Acceleration (AccelMode.SSE2)]
UnpackHigh(this Vector2l v1, Vector2l v2)913 		public static Vector2l UnpackHigh (this Vector2l v1, Vector2l v2)
914 		{
915 			return new Vector2l (v1.y, v2.y);
916 		}
917 
918 		[Acceleration (AccelMode.SSE2)]
919 		[CLSCompliant (false)]
UnpackHigh(this Vector2ul v1, Vector2ul v2)920 		public static Vector2ul UnpackHigh (this Vector2ul v1, Vector2ul v2)
921 		{
922 			return new Vector2ul (v1.y, v2.y);
923 		}
924 
925 		[Acceleration (AccelMode.SSE2)]
UnpackHigh(this Vector4i v1, Vector4i v2)926 		public static Vector4i UnpackHigh (this Vector4i v1, Vector4i v2)
927 		{
928 			return new Vector4i (v1.z, v2.z, v1.w, v2.w);
929 		}
930 
931 		[Acceleration (AccelMode.SSE2)]
932 		[CLSCompliant (false)]
UnpackHigh(this Vector4ui v1, Vector4ui v2)933 		public static Vector4ui UnpackHigh (this Vector4ui v1, Vector4ui v2)
934 		{
935 			return new Vector4ui (v1.z, v2.z, v1.w, v2.w);
936 		}
937 
938 		[Acceleration (AccelMode.SSE2)]
UnpackHigh(this Vector8s va, Vector8s vb)939 		public static unsafe Vector8s UnpackHigh (this Vector8s va, Vector8s vb)
940 		{
941 			return new Vector8s (va.v4, vb.v4, va.v5, vb.v5, va.v6, vb.v6, va.v7, vb.v7);
942 		}
943 
944 		[Acceleration (AccelMode.SSE2)]
945 		[CLSCompliant (false)]
UnpackHigh(this Vector8us va, Vector8us vb)946 		public static unsafe Vector8us UnpackHigh (this Vector8us va, Vector8us vb)
947 		{
948 			return new Vector8us (va.v4, vb.v4, va.v5, vb.v5, va.v6, vb.v6, va.v7, vb.v7);
949 		}
950 
951 		[Acceleration (AccelMode.SSE2)]
952 		[CLSCompliant (false)]
UnpackHigh(this Vector16sb va, Vector16sb vb)953 		public static unsafe Vector16sb UnpackHigh (this Vector16sb va, Vector16sb vb)
954 		{
955 			return new Vector16sb (va.v8, vb.v8, va.v9, vb.v9, va.v10, vb.v10, va.v11, vb.v11, va.v12, vb.v12, va.v13, vb.v13, va.v14, vb.v14, va.v15, vb.v15);
956 		}
957 
958 		[Acceleration (AccelMode.SSE2)]
UnpackHigh(this Vector16b va, Vector16b vb)959 		public static unsafe Vector16b UnpackHigh (this Vector16b va, Vector16b vb)
960 		{
961 			return new Vector16b (va.v8, vb.v8, va.v9, vb.v9, va.v10, vb.v10, va.v11, vb.v11, va.v12, vb.v12, va.v13, vb.v13, va.v14, vb.v14, va.v15, vb.v15);
962 		}
963 
964 		[Acceleration (AccelMode.SSE2)]
Shuffle(this Vector4f v1, Vector4f v2, ShuffleSel sel)965 		public static unsafe Vector4f Shuffle (this Vector4f v1, Vector4f v2, ShuffleSel sel)
966 		{
967 			float *p1 = (float*)&v1;
968 			float *p2 = (float*)&v2;
969 			int idx = (int)sel;
970 			return new Vector4f (*(p1 + ((idx >> 0) & 0x3)), *(p1 + ((idx >> 2) & 0x3)), *(p2 + ((idx >> 4) & 0x3)), *(p2 + ((idx >> 6) & 0x3)));
971 		}
972 
973 		[Acceleration (AccelMode.SSE2)]
Shuffle(this Vector4i v1, Vector4i v2, ShuffleSel sel)974 		public static unsafe Vector4i Shuffle (this Vector4i v1, Vector4i v2, ShuffleSel sel)
975 		{
976 			int *p1 = (int*)&v1;
977 			int *p2 = (int*)&v2;
978 			int idx = (int)sel;
979 			return new Vector4i (*(p1 + ((idx >> 0) & 0x3)), *(p1 + ((idx >> 2) & 0x3)), *(p2 + ((idx >> 4) & 0x3)), *(p2 + ((idx >> 6) & 0x3)));
980 		}
981 
982 		[Acceleration (AccelMode.SSE2)]
Shuffle(this Vector4ui v1, Vector4ui v2, ShuffleSel sel)983 		public static unsafe Vector4ui Shuffle (this Vector4ui v1, Vector4ui v2, ShuffleSel sel)
984 		{
985 			uint *p1 = (uint*)&v1;
986 			uint *p2 = (uint*)&v2;
987 			int idx = (int)sel;
988 			return new Vector4ui (*(p1 + ((idx >> 0) & 0x3)), *(p1 + ((idx >> 2) & 0x3)), *(p2 + ((idx >> 4) & 0x3)), *(p2 + ((idx >> 6) & 0x3)));
989 		}
990 
991 		[Acceleration (AccelMode.SSE2)]
Shuffle(this Vector2d v1, Vector2d v2, int sel)992 		public static unsafe Vector2d Shuffle (this Vector2d v1, Vector2d v2, int sel)
993 		{
994 			double *p1 = (double*)&v1;
995 			double *p2 = (double*)&v2;
996 			return new Vector2d (*(p1 + ((sel >> 0) & 0x1)), *(p2 + ((sel >> 1) & 0x1)));
997 		}
998 
999 		[Acceleration (AccelMode.SSE2)]
Shuffle(this Vector2l v1, Vector2l v2, int sel)1000 		public static unsafe Vector2l Shuffle (this Vector2l v1, Vector2l v2, int sel)
1001 		{
1002 			long *p1 = (long*)&v1;
1003 			long *p2 = (long*)&v2;
1004 			return new Vector2l (*(p1 + ((sel >> 0) & 0x1)), *(p2 + ((sel >> 1) & 0x1)));
1005 		}
1006 
1007 		[Acceleration (AccelMode.SSE2)]
Shuffle(this Vector2ul v1, Vector2ul v2, int sel)1008 		public static unsafe Vector2ul Shuffle (this Vector2ul v1, Vector2ul v2, int sel)
1009 		{
1010 			ulong *p1 = (ulong*)&v1;
1011 			ulong *p2 = (ulong*)&v2;
1012 			return new Vector2ul (*(p1 + ((sel >> 0) & 0x1)), *(p2 + ((sel >> 1) & 0x1)));
1013 		}
1014 
1015 		[Acceleration (AccelMode.SSE2)]
Shuffle(this Vector4f v1, ShuffleSel sel)1016 		public static unsafe Vector4f Shuffle (this Vector4f v1, ShuffleSel sel)
1017 		{
1018 			float *ptr = (float*)&v1;
1019 			int idx = (int)sel;
1020 			return new Vector4f (*(ptr + ((idx >> 0) & 0x3)),*(ptr + ((idx >> 2) & 0x3)),*(ptr + ((idx >> 4) & 0x3)),*(ptr + ((idx >> 6) & 0x3)));
1021 		}
1022 
1023 		[Acceleration (AccelMode.SSE2)]
Shuffle(this Vector4i v1, ShuffleSel sel)1024 		public static unsafe Vector4i Shuffle (this Vector4i v1, ShuffleSel sel)
1025 		{
1026 			int *ptr = (int*)&v1;
1027 			int idx = (int)sel;
1028 			return new Vector4i (*(ptr + ((idx >> 0) & 0x3)),*(ptr + ((idx >> 2) & 0x3)),*(ptr + ((idx >> 4) & 0x3)),*(ptr + ((idx >> 6) & 0x3)));
1029 		}
1030 
1031 		[Acceleration (AccelMode.SSE2)]
1032 		[CLSCompliant (false)]
Shuffle(this Vector4ui v1, ShuffleSel sel)1033 		public static unsafe Vector4ui Shuffle (this Vector4ui v1, ShuffleSel sel)
1034 		{
1035 			uint *ptr = (uint*)&v1;
1036 			int idx = (int)sel;
1037 			return new Vector4ui (*(ptr + ((idx >> 0) & 0x3)),*(ptr + ((idx >> 2) & 0x3)),*(ptr + ((idx >> 4) & 0x3)),*(ptr + ((idx >> 6) & 0x3)));
1038 		}
1039 
1040 		[Acceleration (AccelMode.SSE2)]
ShuffleHigh(this Vector8s va, ShuffleSel sel)1041 		public static unsafe Vector8s ShuffleHigh (this Vector8s va, ShuffleSel sel)
1042 		{
1043 			short *ptr = ((short*)&va) + 4;
1044 			int idx = (int)sel;
1045 			return new Vector8s (va.v0, va.v1, va.v2, va.v3, *(ptr + ((idx >> 0) & 0x3)), *(ptr + ((idx >> 2) & 0x3)), *(ptr + ((idx >> 4) & 0x3)), *(ptr + ((idx >> 6) & 0x3)));
1046 		}
1047 
1048 		[Acceleration (AccelMode.SSE2)]
1049 		[CLSCompliant (false)]
ShuffleHigh(this Vector8us va, ShuffleSel sel)1050 		public static unsafe Vector8us ShuffleHigh (this Vector8us va, ShuffleSel sel)
1051 		{
1052 			ushort *ptr = ((ushort*)&va) + 4;
1053 			int idx = (int)sel;
1054 			return new Vector8us (va.v0, va.v1, va.v2, va.v3, *(ptr + ((idx >> 0) & 0x3)), *(ptr + ((idx >> 2) & 0x3)), *(ptr + ((idx >> 4) & 0x3)), *(ptr + ((idx >> 6) & 0x3)));
1055 		}
1056 
1057 		[Acceleration (AccelMode.SSE2)]
ShuffleLow(this Vector8s va, ShuffleSel sel)1058 		public static unsafe Vector8s ShuffleLow (this Vector8s va, ShuffleSel sel)
1059 		{
1060 			short *ptr = ((short*)&va);
1061 			int idx = (int)sel;
1062 			return new Vector8s (*(ptr + ((idx >> 0) & 0x3)), *(ptr + ((idx >> 2) & 0x3)), *(ptr + ((idx >> 4) & 0x3)), *(ptr + ((idx >> 6) & 0x3)), va.v4, va.v5, va.v6, va.v7);
1063 		}
1064 
1065 		[Acceleration (AccelMode.SSE2)]
1066 		[CLSCompliant (false)]
ShuffleLow(this Vector8us va, ShuffleSel sel)1067 		public static unsafe Vector8us ShuffleLow (this Vector8us va, ShuffleSel sel)
1068 		{
1069 			ushort *ptr = ((ushort*)&va);
1070 			int idx = (int)sel;
1071 			return new Vector8us (*(ptr + ((idx >> 0) & 0x3)), *(ptr + ((idx >> 2) & 0x3)), *(ptr + ((idx >> 4) & 0x3)), *(ptr + ((idx >> 6) & 0x3)), va.v4, va.v5, va.v6, va.v7);
1072 		}
1073 
1074 		[CLSCompliant(false)]
1075 		[Acceleration (AccelMode.SSE41)]
PackWithUnsignedSaturation(this Vector4i va, Vector4i vb)1076 		public static unsafe Vector8us PackWithUnsignedSaturation (this Vector4i va, Vector4i vb) {
1077 			Vector8us res = new Vector8us ();
1078 			int *a = (int*)&va;
1079 			int *b = (int*)&vb;
1080 			ushort *c = (ushort*)&res;
1081 			for (int i = 0; i < 4; ++i)
1082 				*c++ = (ushort)System.Math.Max (0, System.Math.Min (*a++, ushort.MaxValue));
1083 			for (int i = 0; i < 4; ++i)
1084 				*c++ = (ushort)System.Math.Max (0, System.Math.Min (*b++, ushort.MaxValue));
1085 			return res;
1086 		}
1087 
1088 
1089 		[Acceleration (AccelMode.SSE2)]
PackWithUnsignedSaturation(this Vector8s va, Vector8s vb)1090 		public static unsafe Vector16b PackWithUnsignedSaturation (this Vector8s va, Vector8s vb) {
1091 			Vector16b res = new Vector16b ();
1092 			short *a = (short*)&va;
1093 			short *b = (short*)&vb;
1094 			byte *c = (byte*)&res;
1095 			for (int i = 0; i < 8; ++i)
1096 				*c++ = (byte)System.Math.Max (0, System.Math.Min ((int)*a++, byte.MaxValue));
1097 			for (int i = 0; i < 8; ++i)
1098 				*c++ = (byte)System.Math.Max (0, System.Math.Min ((int)*b++, byte.MaxValue));
1099 			return res;
1100 		}
1101 
1102 		[Acceleration (AccelMode.SSE2)]
PackWithSignedSaturation(this Vector4i va, Vector4i vb)1103 		public static unsafe Vector8s PackWithSignedSaturation (this Vector4i va, Vector4i vb) {
1104 			Vector8s res = new Vector8s ();
1105 			int *a = (int*)&va;
1106 			int *b = (int*)&vb;
1107 			short *c = (short*)&res;
1108 			for (int i = 0; i < 4; ++i)
1109 				*c++ = (short)System.Math.Max (System.Math.Min ((int)*a++, short.MaxValue), short.MinValue);
1110 			for (int i = 0; i < 4; ++i)
1111 				*c++ = (short)System.Math.Max (System.Math.Min ((int)*b++, short.MaxValue), short.MinValue);
1112 			return res;
1113 		}
1114 
1115 		[CLSCompliant(false)]
1116 		[Acceleration (AccelMode.SSE2)]
PackWithSignedSaturation(this Vector8s va, Vector8s vb)1117 		public static unsafe Vector16sb PackWithSignedSaturation (this Vector8s va, Vector8s vb) {
1118 			Vector16sb res = new Vector16sb ();
1119 			short *a = (short*)&va;
1120 			short *b = (short*)&vb;
1121 			sbyte *c = (sbyte*)&res;
1122 			for (int i = 0; i < 8; ++i)
1123 				*c++ = (sbyte)System.Math.Max (System.Math.Min ((int)*a++, sbyte.MaxValue), sbyte.MinValue);
1124 			for (int i = 0; i < 8; ++i)
1125 				*c++ = (sbyte)System.Math.Max (System.Math.Min ((int)*b++, sbyte.MaxValue), sbyte.MinValue);
1126 			return res;
1127 		}
1128 
1129 		/* This function performs a packusdw, which treats the source as a signed value */
1130 		[Acceleration (AccelMode.SSE41)]
1131 		[CLSCompliant (false)]
SignedPackWithUnsignedSaturation(this Vector4ui va, Vector4ui vb)1132 		public static unsafe Vector8us SignedPackWithUnsignedSaturation (this Vector4ui va, Vector4ui vb) {
1133 			Vector8us res = new Vector8us ();
1134 			int *a = (int*)&va;
1135 			int *b = (int*)&vb;
1136 			ushort *c = (ushort*)&res;
1137 			for (int i = 0; i < 4; ++i)
1138 				*c++ = (ushort)System.Math.Max (0, System.Math.Min (*a++, ushort.MaxValue));
1139 			for (int i = 0; i < 4; ++i)
1140 				*c++ = (ushort)System.Math.Max (0, System.Math.Min (*b++, ushort.MaxValue));
1141 			return res;
1142 		}
1143 
1144 		/*This function performs a packuswb, which treats the source as a signed value */
1145 		[Acceleration (AccelMode.SSE2)]
1146 		[CLSCompliant (false)]
SignedPackWithUnsignedSaturation(this Vector8us va, Vector8us vb)1147 		public static unsafe Vector16b SignedPackWithUnsignedSaturation (this Vector8us va, Vector8us vb) {
1148 			Vector16b res = new Vector16b ();
1149 			short *a = (short*)&va;
1150 			short *b = (short*)&vb;
1151 			byte *c = (byte*)&res;
1152 			for (int i = 0; i < 8; ++i)
1153 				*c++ = (byte)System.Math.Max (0, System.Math.Min ((int)*a++, byte.MaxValue));
1154 			for (int i = 0; i < 8; ++i)
1155 				*c++ = (byte)System.Math.Max (0, System.Math.Min ((int)*b++, byte.MaxValue));
1156 			return res;
1157 		}
1158 
1159 		/* This function performs a packssdw, which treats the source as a signed value*/
1160 		[Acceleration (AccelMode.SSE2)]
1161 		[CLSCompliant (false)]
SignedPackWithSignedSaturation(this Vector4ui va, Vector4ui vb)1162 		public static unsafe Vector8s SignedPackWithSignedSaturation (this Vector4ui va, Vector4ui vb) {
1163 			Vector8s res = new Vector8s ();
1164 			int *a = (int*)&va;
1165 			int *b = (int*)&vb;
1166 			short *c = (short*)&res;
1167 			for (int i = 0; i < 4; ++i)
1168 				*c++ = (short)System.Math.Max (System.Math.Min ((int)*a++, short.MaxValue), short.MinValue);
1169 			for (int i = 0; i < 4; ++i)
1170 				*c++ = (short)System.Math.Max (System.Math.Min ((int)*b++, short.MaxValue), short.MinValue);
1171 			return res;
1172 		}
1173 
1174 		/*This function performs a packsswb, which treats the source as a signed value */
1175 		[Acceleration (AccelMode.SSE2)]
1176 		[CLSCompliant (false)]
SignedPackWithSignedSaturation(this Vector8us va, Vector8us vb)1177 		public static unsafe Vector16sb SignedPackWithSignedSaturation (this Vector8us va, Vector8us vb) {
1178 			Vector16sb res = new Vector16sb ();
1179 			short *a = (short*)&va;
1180 			short *b = (short*)&vb;
1181 			sbyte *c = (sbyte*)&res;
1182 			for (int i = 0; i < 8; ++i)
1183 				*c++ = (sbyte)System.Math.Max (System.Math.Min ((int)*a++, sbyte.MaxValue), sbyte.MinValue);
1184 			for (int i = 0; i < 8; ++i)
1185 				*c++ = (sbyte)System.Math.Max (System.Math.Min ((int)*b++, sbyte.MaxValue), sbyte.MinValue);
1186 			return res;
1187 		}
1188 
1189 		[Acceleration (AccelMode.SSE2)]
ConvertToFloat(this Vector4i v0)1190 		public static unsafe Vector4f ConvertToFloat (this Vector4i v0) {
1191 			return new Vector4f (v0.X, v0.Y, v0.Z, v0.W);
1192 		}
1193 
1194 		[Acceleration (AccelMode.SSE2)]
ConvertToDouble(this Vector4i v0)1195 		public static unsafe Vector2d ConvertToDouble (this Vector4i v0) {
1196 			return new Vector2d (v0.X, v0.Y);
1197 		}
1198 
1199 		[Acceleration (AccelMode.SSE2)]
ConvertToInt(this Vector2d v0)1200 		public static unsafe Vector4i ConvertToInt (this Vector2d v0) {
1201 			return new Vector4i ((int)System.Math.Round (v0.X), (int)System.Math.Round (v0.Y), 0, 0);
1202 		}
1203 
1204 		[Acceleration (AccelMode.SSE2)]
ConvertToIntTruncated(this Vector2d v0)1205 		public static unsafe Vector4i ConvertToIntTruncated (this Vector2d v0) {
1206 			return new Vector4i ((int) (v0.X), (int) (v0.Y), 0, 0);
1207 		}
1208 
1209 		[Acceleration (AccelMode.SSE2)]
ConvertToFloat(this Vector2d v0)1210 		public static unsafe Vector4f ConvertToFloat (this Vector2d v0) {
1211 			return new Vector4f ((float)v0.X, (float)v0.Y, 0, 0);
1212 		}
1213 
1214 		[Acceleration (AccelMode.SSE2)]
ConvertToInt(this Vector4f v0)1215 		public static unsafe Vector4i ConvertToInt (this Vector4f v0) {
1216 			return new Vector4i ((int)System.Math.Round (v0.X), (int)System.Math.Round (v0.Y), (int)System.Math.Round (v0.Z), (int)System.Math.Round (v0.W));
1217 		}
1218 
1219 		[Acceleration (AccelMode.SSE2)]
ConvertToIntTruncated(this Vector4f v0)1220 		public static unsafe Vector4i ConvertToIntTruncated (this Vector4f v0) {
1221 			return new Vector4i ((int)v0.X, (int)v0.Y, (int)v0.Z, (int)v0.W);
1222 		}
1223 
1224 		[Acceleration (AccelMode.SSE2)]
ConvertToDouble(this Vector4f v0)1225 		public static unsafe Vector2d ConvertToDouble (this Vector4f v0) {
1226 			return new Vector2d (v0.X, v0.Y);
1227 		}
1228 	}
1229 }
1230