1 /////////////////////////////////////////////////////////////////////////
2 // $Id: sse_string.cc 14086 2021-01-30 08:35:35Z sshwarts $
3 /////////////////////////////////////////////////////////////////////////
4 //
5 //   Copyright (c) 2007-2018 Stanislav Shwartsman
6 //          Written by Stanislav Shwartsman [sshwarts at sourceforge net]
7 //
8 //  This library is free software; you can redistribute it and/or
9 //  modify it under the terms of the GNU Lesser General Public
10 //  License as published by the Free Software Foundation; either
11 //  version 2 of the License, or (at your option) any later version.
12 //
13 //  This library is distributed in the hope that it will be useful,
14 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 //  Lesser General Public License for more details.
17 //
18 //  You should have received a copy of the GNU Lesser General Public
19 //  License along with this library; if not, write to the Free Software
20 //  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
21 //
22 /////////////////////////////////////////////////////////////////////////
23 
24 #define NEED_CPU_REG_SHORTCUTS 1
25 #include "bochs.h"
26 #include "cpu.h"
27 #define LOG_THIS BX_CPU_THIS_PTR
28 
29 #if BX_CPU_LEVEL >= 6
30 
31 // Compare all pairs of Ai, Bj according to imm8 control
compare_strings(Bit8u BoolRes[16][16],const BxPackedXmmRegister & op1,const BxPackedXmmRegister & op2,Bit8u imm)32 static void compare_strings(Bit8u BoolRes[16][16], const BxPackedXmmRegister &op1, const BxPackedXmmRegister &op2, Bit8u imm)
33 {
34   unsigned i, j;
35   unsigned aggregation_operation = (imm >> 2) & 3;
36 
37   // All possible comparisons are performed, the individual boolean
38   // results of those comparisons are referred by
39   //        BoolRes[op2 element index, op1 element index]
40 
41   switch (imm & 3) {
42   case 0: /* unsigned bytes compare */
43     for (i=0;i<16;i++) {
44       for (j=0;j<16;j++) {
45         switch (aggregation_operation) {
46         case 0: /* 'equal' comparison */
47         case 2:
48         case 3:
49           BoolRes[j][i] = (op1.xmmubyte(i) == op2.xmmubyte(j));
50           break;
51         case 1: /* 'ranges' comparison */
52           if ((i % 2) == 0)
53             BoolRes[j][i] = (op1.xmmubyte(i) <= op2.xmmubyte(j));
54           else
55             BoolRes[j][i] = (op1.xmmubyte(i) >= op2.xmmubyte(j));
56           break;
57         }
58       }
59     }
60     break;
61 
62   case 1: /* unsigned words compare */
63     for (i=0;i<8;i++) {
64       for (j=0;j<8;j++) {
65         switch (aggregation_operation) {
66         case 0: /* 'equal' comparison */
67         case 2:
68         case 3:
69           BoolRes[j][i] = (op1.xmm16u(i) == op2.xmm16u(j));
70           break;
71         case 1: /* 'ranges' comparison */
72           if ((i % 2) == 0)
73             BoolRes[j][i] = (op1.xmm16u(i) <= op2.xmm16u(j));
74           else
75             BoolRes[j][i] = (op1.xmm16u(i) >= op2.xmm16u(j));
76           break;
77         }
78       }
79     }
80     break;
81 
82   case 2: /*   signed bytes compare */
83     for (i=0;i<16;i++) {
84       for (j=0;j<16;j++) {
85         switch (aggregation_operation) {
86         case 0: /* 'equal' comparison */
87         case 2:
88         case 3:
89           BoolRes[j][i] = (op1.xmmsbyte(i) == op2.xmmsbyte(j));
90           break;
91         case 1: /* 'ranges' comparison */
92           if ((i % 2) == 0)
93             BoolRes[j][i] = (op1.xmmsbyte(i) <= op2.xmmsbyte(j));
94           else
95             BoolRes[j][i] = (op1.xmmsbyte(i) >= op2.xmmsbyte(j));
96           break;
97         }
98       }
99     }
100     break;
101 
102   case 3: /*   signed words compare */
103     for (i=0;i<8;i++) {
104       for (j=0;j<8;j++) {
105         switch (aggregation_operation) {
106         case 0: /* 'equal' comparison */
107         case 2:
108         case 3:
109           BoolRes[j][i] = (op1.xmm16s(i) == op2.xmm16s(j));
110           break;
111         case 1: /* 'ranges' comparison */
112           if ((i % 2) == 0)
113             BoolRes[j][i] = (op1.xmm16s(i) <= op2.xmm16s(j));
114           else
115             BoolRes[j][i] = (op1.xmm16s(i) >= op2.xmm16s(j));
116           break;
117         }
118       }
119     }
120     break;
121   }
122 }
123 
find_eos32(Bit32s reg32,Bit8u imm)124 static unsigned find_eos32(Bit32s reg32, Bit8u imm)
125 {
126   if (imm & 0x1) {  // 8  elements
127     if (reg32 > 8 || reg32 < -8) return 8;
128     else return abs(reg32);
129   }
130   else {            // 16 elements
131     if (reg32 > 16 || reg32 < -16) return 16;
132     else return abs(reg32);
133   }
134 }
135 
136 #if BX_SUPPORT_X86_64
find_eos64(Bit64s reg64,Bit8u imm)137 static unsigned find_eos64(Bit64s reg64, Bit8u imm)
138 {
139   if (imm & 0x1) {  // 8  elements
140     if (reg64 > 8 || reg64 < -8) return 8;
141     else return (unsigned) abs(reg64);
142   }
143   else {            // 16 elements
144     if (reg64 > 16 || reg64 < -16) return 16;
145     else return (unsigned) abs(reg64);
146   }
147 }
148 #endif
149 
find_eos(const BxPackedXmmRegister & op,Bit8u imm)150 static unsigned find_eos(const BxPackedXmmRegister &op, Bit8u imm)
151 {
152   unsigned i = 0;
153 
154   if (imm & 0x1) {  // 8  elements
155     for(i=0;i<8;i++)
156       if (op.xmm16u(i) == 0) break;
157   }
158   else {            // 16 elements
159     for(i=0;i<16;i++)
160       if (op.xmmubyte(i) == 0) break;
161   }
162 
163   return i;
164 }
165 
override_if_data_invalid(bool val,bool i_valid,bool j_valid,Bit8u imm)166 static bool override_if_data_invalid(bool val, bool i_valid, bool j_valid, Bit8u imm)
167 {
168   unsigned aggregation_operation = (imm >> 2) & 3;
169 
170   switch(aggregation_operation) {
171   case 0: // 'equal any'
172   case 1: // 'ranges'
173     if (! i_valid || ! j_valid) // one of the elements is invalid
174       return 0;
175     break;
176 
177   case 2: // 'equal each'
178     if (! i_valid) {
179       if (! j_valid) return 1; // both elements are invalid
180       else return 0;           // only i is invalid
181     }
182     else {
183       if (! j_valid) return 0; // only j is invalid
184     }
185     break;
186 
187   case 3: // 'equal ordered'
188     if (! i_valid) {           // element i is invalid
189       return 1;
190     }
191     else {
192       if (! j_valid) {         // only j is invalid
193         return 0;
194       }
195     }
196     break;
197   }
198 
199   return val;
200 }
201 
aggregate(Bit8u BoolRes[16][16],unsigned len1,unsigned len2,Bit8u imm)202 static Bit16u aggregate(Bit8u BoolRes[16][16], unsigned len1, unsigned len2, Bit8u imm)
203 {
204   unsigned aggregation_operation = (imm >> 2) & 3;
205   unsigned num_elements = (imm & 0x1) ? 8 : 16;
206   unsigned polarity = (imm >> 4) & 3;
207   unsigned i,j,k;
208 
209   Bit16u result = 0;
210 
211   switch(aggregation_operation) {
212   case 0: // 'equal any'
213     for(j=0; j<num_elements; j++) {
214       bool res = 0;
215       for(i=0; i<num_elements; i++) {
216         if (override_if_data_invalid(BoolRes[j][i], (i < len1), (j < len2), imm)) {
217           res = 1;
218           break;
219         }
220       }
221 
222       if (res) result |= (1<<j);
223     }
224     break;
225 
226   case 1: // 'ranges'
227     for(j=0; j<num_elements; j++) {
228       bool res = 0;
229       for(i=0; i<num_elements; i+=2) {
230         if (override_if_data_invalid(BoolRes[j][i],     (i < len1), (j < len2), imm) &&
231             override_if_data_invalid(BoolRes[j][i+1], (i+1 < len1), (j < len2), imm)) {
232           res = 1;
233           break;
234         }
235       }
236 
237       if (res) result |= (1<<j);
238     }
239     break;
240 
241   case 2: // 'equal each'
242     for(j=0; j<num_elements; j++) {
243       if (override_if_data_invalid(BoolRes[j][j], (j < len1), (j < len2), imm))
244         result |= (1<<j);
245     }
246     break;
247 
248   case 3: // 'equal ordered'
249     for(j=0; j<num_elements; j++) {
250       bool res = 1;
251       for (i=0, k=j; (i < num_elements-j) && (k < num_elements); i++, k++) {
252         if (! override_if_data_invalid(BoolRes[k][i], (i < len1), (k < len2), imm)) {
253           res = 0;
254           break;
255         }
256       }
257 
258       if (res) result |= (1<<j);
259     }
260     break;
261   }
262 
263   switch(polarity) {
264   case 0:
265   case 2:
266     break; // do nothing
267 
268   case 1:
269     result ^= (num_elements == 8) ? 0xFF : 0xFFFF;
270     break;
271 
272   case 3:
273     for (j=0;j<num_elements;j++)
274       if (j < len2) result ^= (1<<j);    // flip the bit
275     break;
276   }
277 
278   return result;
279 }
280 
281 /* 66 0F 3A 60 */
PCMPESTRM_VdqWdqIbR(bxInstruction_c * i)282 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPESTRM_VdqWdqIbR(bxInstruction_c *i)
283 {
284   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst());
285   BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->src()), result;
286   Bit8u imm8 = i->Ib();
287 
288   // compare all pairs of Ai, Bj
289   Bit8u BoolRes[16][16];
290   compare_strings(BoolRes, op1, op2, imm8);
291   unsigned len1, len2, num_elements = (imm8 & 0x1) ? 8 : 16;
292 
293 #if BX_SUPPORT_X86_64
294   if (i->os64L()) {
295     len1 = find_eos64(RAX, imm8);
296     len2 = find_eos64(RDX, imm8);
297   }
298   else
299 #endif
300   {
301     len1 = find_eos32(EAX, imm8);
302     len2 = find_eos32(EDX, imm8);
303   }
304   Bit16u result2 = aggregate(BoolRes, len1, len2, imm8);
305 
306   // As defined by imm8[6], result2 is then either stored to the least
307   // significant bits of XMM0 (zero extended to 128 bits) or expanded
308   // into a byte/word-mask and then stored to XMM0
309   if (imm8 & 0x40) {
310      if (num_elements == 8) {
311        for (int index = 0; index < 8; index++)
312          result.xmm16u(index) = (result2 & (1<<index)) ? 0xffff : 0;
313      }
314      else {  // num_elements = 16
315        for (int index = 0; index < 16; index++)
316          result.xmmubyte(index) = (result2 & (1<<index)) ? 0xff : 0;
317      }
318   }
319   else {
320      result.xmm64u(1) = 0;
321      result.xmm64u(0) = (Bit64u) result2;
322   }
323 
324   Bit32u flags = 0;
325   if (result2 != 0) flags |= EFlagsCFMask;
326   if (len1 < num_elements) flags |= EFlagsSFMask;
327   if (len2 < num_elements) flags |= EFlagsZFMask;
328   if (result2 & 0x1)
329     flags |= EFlagsOFMask;
330   setEFlagsOSZAPC(flags);
331 
332   BX_WRITE_XMM_REGZ(0, result, i->getVL()); /* store result XMM0 */
333 
334   BX_NEXT_INSTR(i);
335 }
336 
337 /* 66 0F 3A 61 */
PCMPESTRI_VdqWdqIbR(bxInstruction_c * i)338 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPESTRI_VdqWdqIbR(bxInstruction_c *i)
339 {
340   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
341   Bit8u imm8 = i->Ib();
342 
343   // compare all pairs of Ai, Bj
344   Bit8u BoolRes[16][16];
345   compare_strings(BoolRes, op1, op2, imm8);
346   unsigned len1, len2, num_elements = (imm8 & 0x1) ? 8 : 16;
347   int index;
348 
349 #if BX_SUPPORT_X86_64
350   if (i->os64L()) {
351     len1 = find_eos64(RAX, imm8);
352     len2 = find_eos64(RDX, imm8);
353   }
354   else
355 #endif
356   {
357     len1 = find_eos32(EAX, imm8);
358     len2 = find_eos32(EDX, imm8);
359   }
360   Bit16u result2 = aggregate(BoolRes, len1, len2, imm8);
361 
362   // The index of the first (or last, according to imm8[6]) set bit of result2
363   // is returned to ECX. If no bits are set in IntRes2, ECX is set to 16 (8)
364   if (imm8 & 0x40) {
365      // The index returned to ECX is of the MSB in result2
366      for (index=num_elements-1; index>=0; index--)
367        if (result2 & (1<<index)) break;
368      if (index < 0) index = num_elements;
369   }
370   else {
371      // The index returned to ECX is of the LSB in result2
372      for (index=0; index<(int)num_elements; index++)
373        if (result2 & (1<<index)) break;
374   }
375   RCX = index;
376 
377   Bit32u flags = 0;
378   if (result2 != 0) flags |= EFlagsCFMask;
379   if (len1 < num_elements) flags |= EFlagsSFMask;
380   if (len2 < num_elements) flags |= EFlagsZFMask;
381   if (result2 & 0x1)
382     flags |= EFlagsOFMask;
383   setEFlagsOSZAPC(flags);
384 
385   BX_NEXT_INSTR(i);
386 }
387 
388 /* 66 0F 3A 62 */
PCMPISTRM_VdqWdqIbR(bxInstruction_c * i)389 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPISTRM_VdqWdqIbR(bxInstruction_c *i)
390 {
391   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst());
392   BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->src()), result;
393   Bit8u imm8 = i->Ib();
394 
395   // compare all pairs of Ai, Bj
396   Bit8u BoolRes[16][16];
397   compare_strings(BoolRes, op1, op2, imm8);
398 
399   unsigned num_elements = (imm8 & 0x1) ? 8 : 16;
400   unsigned len1 = find_eos(op1, imm8);
401   unsigned len2 = find_eos(op2, imm8);
402   Bit16u result2 = aggregate(BoolRes, len1, len2, imm8);
403 
404   // As defined by imm8[6], result2 is then either stored to the least
405   // significant bits of XMM0 (zero extended to 128 bits) or expanded
406   // into a byte/word-mask and then stored to XMM0
407   if (imm8 & 0x40) {
408      if (num_elements == 8) {
409        for (int index = 0; index < 8; index++)
410          result.xmm16u(index) = (result2 & (1<<index)) ? 0xffff : 0;
411      }
412      else {  // num_elements = 16
413        for (int index = 0; index < 16; index++)
414          result.xmmubyte(index) = (result2 & (1<<index)) ? 0xff : 0;
415      }
416   }
417   else {
418      result.xmm64u(1) = 0;
419      result.xmm64u(0) = (Bit64u) result2;
420   }
421 
422   Bit32u flags = 0;
423   if (result2 != 0) flags |= EFlagsCFMask;
424   if (len1 < num_elements) flags |= EFlagsSFMask;
425   if (len2 < num_elements) flags |= EFlagsZFMask;
426   if (result2 & 0x1)
427     flags |= EFlagsOFMask;
428   setEFlagsOSZAPC(flags);
429 
430   BX_WRITE_XMM_REGZ(0, result, i->getVL()); /* store result XMM0 */
431 
432   BX_NEXT_INSTR(i);
433 }
434 
435 /* 66 0F 3A 63 */
PCMPISTRI_VdqWdqIbR(bxInstruction_c * i)436 void BX_CPP_AttrRegparmN(1) BX_CPU_C::PCMPISTRI_VdqWdqIbR(bxInstruction_c *i)
437 {
438   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
439   Bit8u imm8 = i->Ib();
440 
441   // compare all pairs of Ai, Bj
442   Bit8u BoolRes[16][16];
443   compare_strings(BoolRes, op1, op2, imm8);
444   unsigned num_elements = (imm8 & 0x1) ? 8 : 16;
445   int index;
446 
447   unsigned len1 = find_eos(op1, imm8);
448   unsigned len2 = find_eos(op2, imm8);
449   Bit16u result2 = aggregate(BoolRes, len1, len2, imm8);
450 
451   // The index of the first (or last, according to imm8[6]) set bit of result2
452   // is returned to ECX. If no bits are set in IntRes2, ECX is set to 16 (8)
453   if (imm8 & 0x40) {
454      // The index returned to ECX is of the MSB in result2
455      for (index=num_elements-1; index>=0; index--)
456        if (result2 & (1<<index)) break;
457      if (index < 0) index = num_elements;
458   }
459   else {
460      // The index returned to ECX is of the LSB in result2
461      for (index=0; index<(int)num_elements; index++)
462        if (result2 & (1<<index)) break;
463   }
464   RCX = index;
465 
466   Bit32u flags = 0;
467   if (result2 != 0) flags |= EFlagsCFMask;
468   if (len1 < num_elements) flags |= EFlagsSFMask;
469   if (len2 < num_elements) flags |= EFlagsZFMask;
470   if (result2 & 0x1)
471     flags |= EFlagsOFMask;
472   setEFlagsOSZAPC(flags);
473 
474   BX_NEXT_INSTR(i);
475 }
476 
477 #endif // BX_CPU_LEVEL >= 6
478