16ca2c52aSchristos# Altivec instruction set, for PSIM, the PowerPC simulator. 26ca2c52aSchristos 3*184b2d41Schristos# Copyright 2003-2020 Free Software Foundation, Inc. 46ca2c52aSchristos 56ca2c52aSchristos# Contributed by Red Hat Inc; developed under contract from Motorola. 66ca2c52aSchristos# Written by matthew green <mrg@redhat.com>. 76ca2c52aSchristos 86ca2c52aSchristos# This file is part of GDB. 96ca2c52aSchristos 106ca2c52aSchristos# This program is free software; you can redistribute it and/or modify 116ca2c52aSchristos# it under the terms of the GNU General Public License as published by 126ca2c52aSchristos# the Free Software Foundation; either version 3 of the License, or 136ca2c52aSchristos# (at your option) any later version. 146ca2c52aSchristos 156ca2c52aSchristos# This program is distributed in the hope that it will be useful, 166ca2c52aSchristos# but WITHOUT ANY WARRANTY; without even the implied warranty of 176ca2c52aSchristos# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 186ca2c52aSchristos# GNU General Public License for more details. 196ca2c52aSchristos 206ca2c52aSchristos# You should have received a copy of the GNU General Public License 216ca2c52aSchristos# along with this program. If not, see <http://www.gnu.org/licenses/>. */ 226ca2c52aSchristos 236ca2c52aSchristos 246ca2c52aSchristos# 256ca2c52aSchristos# Motorola AltiVec instructions. 266ca2c52aSchristos# 276ca2c52aSchristos 286ca2c52aSchristos:cache:av:::VS:VS: 296ca2c52aSchristos:cache:av::vreg *:vS:VS:(cpu_registers(processor)->altivec.vr + VS) 306ca2c52aSchristos:cache:av::unsigned32:VS_BITMASK:VS:(1 << VS) 316ca2c52aSchristos:cache:av:::VA:VA: 326ca2c52aSchristos:cache:av::vreg *:vA:VA:(cpu_registers(processor)->altivec.vr + VA) 336ca2c52aSchristos:cache:av::unsigned32:VA_BITMASK:VA:(1 << VA) 346ca2c52aSchristos:cache:av:::VB:VB: 356ca2c52aSchristos:cache:av::vreg *:vB:VB:(cpu_registers(processor)->altivec.vr + VB) 366ca2c52aSchristos:cache:av::unsigned32:VB_BITMASK:VB:(1 << VB) 376ca2c52aSchristos:cache:av:::VC:VC: 386ca2c52aSchristos:cache:av::vreg *:vC:VC:(cpu_registers(processor)->altivec.vr + VC) 396ca2c52aSchristos:cache:av::unsigned32:VC_BITMASK:VC:(1 << VC) 406ca2c52aSchristos 416ca2c52aSchristos# Flags for model.h 426ca2c52aSchristos::model-macro::: 436ca2c52aSchristos #define PPC_INSN_INT_VR(OUT_MASK, IN_MASK, OUT_VMASK, IN_VMASK) \ 446ca2c52aSchristos do { \ 456ca2c52aSchristos if (CURRENT_MODEL_ISSUE > 0) \ 466ca2c52aSchristos ppc_insn_int_vr(MY_INDEX, cpu_model(processor), OUT_MASK, IN_MASK, OUT_VMASK, IN_VMASK); \ 476ca2c52aSchristos } while (0) 486ca2c52aSchristos 496ca2c52aSchristos #define PPC_INSN_VR(OUT_VMASK, IN_VMASK) \ 506ca2c52aSchristos do { \ 516ca2c52aSchristos if (CURRENT_MODEL_ISSUE > 0) \ 526ca2c52aSchristos ppc_insn_vr(MY_INDEX, cpu_model(processor), OUT_VMASK, IN_VMASK); \ 536ca2c52aSchristos } while (0) 546ca2c52aSchristos 556ca2c52aSchristos #define PPC_INSN_VR_CR(OUT_VMASK, IN_VMASK, CR_MASK) \ 566ca2c52aSchristos do { \ 576ca2c52aSchristos if (CURRENT_MODEL_ISSUE > 0) \ 586ca2c52aSchristos ppc_insn_vr_cr(MY_INDEX, cpu_model(processor), OUT_VMASK, IN_VMASK, CR_MASK); \ 596ca2c52aSchristos } while (0) 606ca2c52aSchristos 616ca2c52aSchristos #define PPC_INSN_VR_VSCR(OUT_VMASK, IN_VMASK) \ 626ca2c52aSchristos do { \ 636ca2c52aSchristos if (CURRENT_MODEL_ISSUE > 0) \ 646ca2c52aSchristos ppc_insn_vr_vscr(MY_INDEX, cpu_model(processor), OUT_VMASK, IN_VMASK); \ 656ca2c52aSchristos } while (0) 666ca2c52aSchristos 676ca2c52aSchristos #define PPC_INSN_FROM_VSCR(VR_MASK) \ 686ca2c52aSchristos do { \ 696ca2c52aSchristos if (CURRENT_MODEL_ISSUE > 0) \ 706ca2c52aSchristos ppc_insn_from_vscr(MY_INDEX, cpu_model(processor), VR_MASK); \ 716ca2c52aSchristos } while (0) 726ca2c52aSchristos 736ca2c52aSchristos #define PPC_INSN_TO_VSCR(VR_MASK) \ 746ca2c52aSchristos do { \ 756ca2c52aSchristos if (CURRENT_MODEL_ISSUE > 0) \ 766ca2c52aSchristos ppc_insn_to_vscr(MY_INDEX, cpu_model(processor), VR_MASK); \ 776ca2c52aSchristos } while (0) 786ca2c52aSchristos 796ca2c52aSchristos# Trace waiting for AltiVec registers to become available 806ca2c52aSchristosvoid::model-static::model_trace_altivec_busy_p:model_data *model_ptr, unsigned32 vr_busy 816ca2c52aSchristos int i; 826ca2c52aSchristos if (vr_busy) { 836ca2c52aSchristos vr_busy &= model_ptr->vr_busy; 846ca2c52aSchristos for(i = 0; i < 32; i++) { 856ca2c52aSchristos if (((1 << i) & vr_busy) != 0) { 866ca2c52aSchristos TRACE(trace_model, ("Waiting for register v%d.\n", i)); 876ca2c52aSchristos } 886ca2c52aSchristos } 896ca2c52aSchristos } 906ca2c52aSchristos if (model_ptr->vscr_busy) 916ca2c52aSchristos TRACE(trace_model, ("Waiting for VSCR\n")); 926ca2c52aSchristos 936ca2c52aSchristos# Trace making AltiVec registers busy 946ca2c52aSchristosvoid::model-static::model_trace_altivec_make_busy:model_data *model_ptr, unsigned32 vr_mask, unsigned32 cr_mask 956ca2c52aSchristos int i; 966ca2c52aSchristos if (vr_mask) { 976ca2c52aSchristos for(i = 0; i < 32; i++) { 986ca2c52aSchristos if (((1 << i) & vr_mask) != 0) { 996ca2c52aSchristos TRACE(trace_model, ("Register v%d is now busy.\n", i)); 1006ca2c52aSchristos } 1016ca2c52aSchristos } 1026ca2c52aSchristos } 1036ca2c52aSchristos if (cr_mask) { 1046ca2c52aSchristos for(i = 0; i < 8; i++) { 1056ca2c52aSchristos if (((1 << i) & cr_mask) != 0) { 1066ca2c52aSchristos TRACE(trace_model, ("Register cr%d is now busy.\n", i)); 1076ca2c52aSchristos } 1086ca2c52aSchristos } 1096ca2c52aSchristos } 1106ca2c52aSchristos 1116ca2c52aSchristos# Schedule an AltiVec instruction that takes integer input registers and produces output registers 1126ca2c52aSchristosvoid::model-function::ppc_insn_int_vr:itable_index index, model_data *model_ptr, const unsigned32 out_mask, const unsigned32 in_mask, const unsigned32 out_vmask, const unsigned32 in_vmask 1136ca2c52aSchristos const unsigned32 int_mask = out_mask | in_mask; 1146ca2c52aSchristos const unsigned32 vr_mask = out_vmask | in_vmask; 1156ca2c52aSchristos model_busy *busy_ptr; 1166ca2c52aSchristos 1176ca2c52aSchristos if ((model_ptr->int_busy & int_mask) != 0 || (model_ptr->vr_busy & vr_mask)) { 1186ca2c52aSchristos model_new_cycle(model_ptr); /* don't count first dependency as a stall */ 1196ca2c52aSchristos 1206ca2c52aSchristos while ((model_ptr->int_busy & int_mask) != 0 || (model_ptr->vr_busy & vr_mask)) { 1216ca2c52aSchristos if (WITH_TRACE && ppc_trace[trace_model]) { 1226ca2c52aSchristos model_trace_busy_p(model_ptr, int_mask, 0, 0, PPC_NO_SPR); 1236ca2c52aSchristos model_trace_altivec_busy_p(model_ptr, vr_mask); 1246ca2c52aSchristos } 1256ca2c52aSchristos 1266ca2c52aSchristos model_ptr->nr_stalls_data++; 1276ca2c52aSchristos model_new_cycle(model_ptr); 1286ca2c52aSchristos } 1296ca2c52aSchristos } 1306ca2c52aSchristos 1316ca2c52aSchristos busy_ptr = model_wait_for_unit(index, model_ptr, &model_ptr->timing[index]); 1326ca2c52aSchristos model_ptr->int_busy |= out_mask; 1336ca2c52aSchristos busy_ptr->int_busy |= out_mask; 1346ca2c52aSchristos model_ptr->vr_busy |= out_vmask; 1356ca2c52aSchristos busy_ptr->vr_busy |= out_vmask; 1366ca2c52aSchristos 1376ca2c52aSchristos if (out_mask) 1386ca2c52aSchristos busy_ptr->nr_writebacks = (PPC_ONE_BIT_SET_P(out_vmask)) ? 1 : 2; 1396ca2c52aSchristos 1406ca2c52aSchristos if (out_vmask) 1416ca2c52aSchristos busy_ptr->nr_writebacks += (PPC_ONE_BIT_SET_P(out_vmask)) ? 1 : 2; 1426ca2c52aSchristos 1436ca2c52aSchristos if (WITH_TRACE && ppc_trace[trace_model]) { 1446ca2c52aSchristos model_trace_make_busy(model_ptr, out_mask, 0, 0); 1456ca2c52aSchristos model_trace_altivec_make_busy(model_ptr, vr_mask, 0); 1466ca2c52aSchristos } 1476ca2c52aSchristos 1486ca2c52aSchristos# Schedule an AltiVec instruction that takes vector input registers and produces vector output registers 1496ca2c52aSchristosvoid::model-function::ppc_insn_vr:itable_index index, model_data *model_ptr, const unsigned32 out_vmask, const unsigned32 in_vmask 1506ca2c52aSchristos const unsigned32 vr_mask = out_vmask | in_vmask; 1516ca2c52aSchristos model_busy *busy_ptr; 1526ca2c52aSchristos 1536ca2c52aSchristos if (model_ptr->vr_busy & vr_mask) { 1546ca2c52aSchristos model_new_cycle(model_ptr); /* don't count first dependency as a stall */ 1556ca2c52aSchristos 1566ca2c52aSchristos while (model_ptr->vr_busy & vr_mask) { 1576ca2c52aSchristos if (WITH_TRACE && ppc_trace[trace_model]) { 1586ca2c52aSchristos model_trace_altivec_busy_p(model_ptr, vr_mask); 1596ca2c52aSchristos } 1606ca2c52aSchristos 1616ca2c52aSchristos model_ptr->nr_stalls_data++; 1626ca2c52aSchristos model_new_cycle(model_ptr); 1636ca2c52aSchristos } 1646ca2c52aSchristos } 1656ca2c52aSchristos 1666ca2c52aSchristos busy_ptr = model_wait_for_unit(index, model_ptr, &model_ptr->timing[index]); 1676ca2c52aSchristos model_ptr->vr_busy |= out_vmask; 1686ca2c52aSchristos busy_ptr->vr_busy |= out_vmask; 1696ca2c52aSchristos if (out_vmask) 1706ca2c52aSchristos busy_ptr->nr_writebacks = (PPC_ONE_BIT_SET_P(out_vmask)) ? 1 : 2; 1716ca2c52aSchristos 1726ca2c52aSchristos if (WITH_TRACE && ppc_trace[trace_model]) { 1736ca2c52aSchristos model_trace_altivec_make_busy(model_ptr, vr_mask, 0); 1746ca2c52aSchristos } 1756ca2c52aSchristos 1766ca2c52aSchristos# Schedule an AltiVec instruction that takes vector input registers and produces vector output registers, touches CR 1776ca2c52aSchristosvoid::model-function::ppc_insn_vr_cr:itable_index index, model_data *model_ptr, const unsigned32 out_vmask, const unsigned32 in_vmask, const unsigned32 cr_mask 1786ca2c52aSchristos const unsigned32 vr_mask = out_vmask | in_vmask; 1796ca2c52aSchristos model_busy *busy_ptr; 1806ca2c52aSchristos 1816ca2c52aSchristos if ((model_ptr->vr_busy & vr_mask) || (model_ptr->cr_fpscr_busy & cr_mask)) { 1826ca2c52aSchristos model_new_cycle(model_ptr); /* don't count first dependency as a stall */ 1836ca2c52aSchristos 1846ca2c52aSchristos while ((model_ptr->vr_busy & vr_mask) || (model_ptr->cr_fpscr_busy & cr_mask)) { 1856ca2c52aSchristos if (WITH_TRACE && ppc_trace[trace_model]) { 1866ca2c52aSchristos model_trace_busy_p(model_ptr, 0, 0, cr_mask, PPC_NO_SPR); 1876ca2c52aSchristos model_trace_altivec_busy_p(model_ptr, vr_mask); 1886ca2c52aSchristos } 1896ca2c52aSchristos 1906ca2c52aSchristos model_ptr->nr_stalls_data++; 1916ca2c52aSchristos model_new_cycle(model_ptr); 1926ca2c52aSchristos } 1936ca2c52aSchristos } 1946ca2c52aSchristos 1956ca2c52aSchristos busy_ptr = model_wait_for_unit(index, model_ptr, &model_ptr->timing[index]); 1966ca2c52aSchristos model_ptr->cr_fpscr_busy |= cr_mask; 1976ca2c52aSchristos busy_ptr->cr_fpscr_busy |= cr_mask; 1986ca2c52aSchristos model_ptr->vr_busy |= out_vmask; 1996ca2c52aSchristos busy_ptr->vr_busy |= out_vmask; 2006ca2c52aSchristos 2016ca2c52aSchristos if (out_vmask) 2026ca2c52aSchristos busy_ptr->nr_writebacks = (PPC_ONE_BIT_SET_P(out_vmask)) ? 1 : 2; 2036ca2c52aSchristos 2046ca2c52aSchristos if (cr_mask) 2056ca2c52aSchristos busy_ptr->nr_writebacks++; 2066ca2c52aSchristos 2076ca2c52aSchristos if (WITH_TRACE && ppc_trace[trace_model]) 2086ca2c52aSchristos model_trace_altivec_make_busy(model_ptr, vr_mask, cr_mask); 2096ca2c52aSchristos 2106ca2c52aSchristos# Schedule an AltiVec instruction that takes vector input registers and produces vector output registers, touches VSCR 2116ca2c52aSchristosvoid::model-function::ppc_insn_vr_vscr:itable_index index, model_data *model_ptr, const unsigned32 out_vmask, const unsigned32 in_vmask 2126ca2c52aSchristos const unsigned32 vr_mask = out_vmask | in_vmask; 2136ca2c52aSchristos model_busy *busy_ptr; 2146ca2c52aSchristos 2156ca2c52aSchristos if ((model_ptr->vr_busy & vr_mask) != 0 || model_ptr->vscr_busy != 0) { 2166ca2c52aSchristos model_new_cycle(model_ptr); /* don't count first dependency as a stall */ 2176ca2c52aSchristos 2186ca2c52aSchristos while ((model_ptr->vr_busy & vr_mask) != 0 || model_ptr->vscr_busy != 0) { 2196ca2c52aSchristos if (WITH_TRACE && ppc_trace[trace_model]) 2206ca2c52aSchristos model_trace_altivec_busy_p(model_ptr, vr_mask); 2216ca2c52aSchristos 2226ca2c52aSchristos model_ptr->nr_stalls_data++; 2236ca2c52aSchristos model_new_cycle(model_ptr); 2246ca2c52aSchristos } 2256ca2c52aSchristos } 2266ca2c52aSchristos 2276ca2c52aSchristos busy_ptr = model_wait_for_unit(index, model_ptr, &model_ptr->timing[index]); 2286ca2c52aSchristos model_ptr->vr_busy |= out_vmask; 2296ca2c52aSchristos busy_ptr->vr_busy |= out_vmask; 2306ca2c52aSchristos model_ptr->vscr_busy = 1; 2316ca2c52aSchristos busy_ptr->vscr_busy = 1; 2326ca2c52aSchristos 2336ca2c52aSchristos if (out_vmask) 2346ca2c52aSchristos busy_ptr->nr_writebacks = 1 + (PPC_ONE_BIT_SET_P(out_vmask)) ? 1 : 2; 2356ca2c52aSchristos 2366ca2c52aSchristos if (WITH_TRACE && ppc_trace[trace_model]) 2376ca2c52aSchristos model_trace_altivec_make_busy(model_ptr, vr_mask, 0); 2386ca2c52aSchristos 2396ca2c52aSchristos# Schedule an MFVSCR instruction that VSCR input register and produces an AltiVec output register 2406ca2c52aSchristosvoid::model-function::ppc_insn_from_vscr:itable_index index, model_data *model_ptr, const unsigned32 vr_mask 2416ca2c52aSchristos model_busy *busy_ptr; 2426ca2c52aSchristos 2436ca2c52aSchristos while ((model_ptr->vr_busy & vr_mask) != 0 || model_ptr->vscr_busy != 0) { 2446ca2c52aSchristos if (WITH_TRACE && ppc_trace[trace_model]) 2456ca2c52aSchristos model_trace_altivec_busy_p(model_ptr, vr_mask); 2466ca2c52aSchristos 2476ca2c52aSchristos model_ptr->nr_stalls_data++; 2486ca2c52aSchristos model_new_cycle(model_ptr); 2496ca2c52aSchristos } 2506ca2c52aSchristos busy_ptr = model_wait_for_unit(index, model_ptr, &model_ptr->timing[index]); 2516ca2c52aSchristos model_ptr->cr_fpscr_busy |= vr_mask; 2526ca2c52aSchristos busy_ptr->cr_fpscr_busy |= vr_mask; 2536ca2c52aSchristos 2546ca2c52aSchristos if (vr_mask) 2556ca2c52aSchristos busy_ptr->nr_writebacks = 1; 2566ca2c52aSchristos 2576ca2c52aSchristos model_ptr->vr_busy |= vr_mask; 2586ca2c52aSchristos if (WITH_TRACE && ppc_trace[trace_model]) 2596ca2c52aSchristos model_trace_altivec_make_busy(model_ptr, vr_mask, 0); 2606ca2c52aSchristos 2616ca2c52aSchristos# Schedule an MTVSCR instruction that one AltiVec input register and produces a vscr output register 2626ca2c52aSchristosvoid::model-function::ppc_insn_to_vscr:itable_index index, model_data *model_ptr, const unsigned32 vr_mask 2636ca2c52aSchristos model_busy *busy_ptr; 2646ca2c52aSchristos 2656ca2c52aSchristos while ((model_ptr->vr_busy & vr_mask) != 0 || model_ptr->vscr_busy != 0) { 2666ca2c52aSchristos if (WITH_TRACE && ppc_trace[trace_model]) 2676ca2c52aSchristos model_trace_altivec_busy_p(model_ptr, vr_mask); 2686ca2c52aSchristos 2696ca2c52aSchristos model_ptr->nr_stalls_data++; 2706ca2c52aSchristos model_new_cycle(model_ptr); 2716ca2c52aSchristos } 2726ca2c52aSchristos busy_ptr = model_wait_for_unit(index, model_ptr, &model_ptr->timing[index]); 2736ca2c52aSchristos busy_ptr ->vscr_busy = 1; 2746ca2c52aSchristos model_ptr->vscr_busy = 1; 2756ca2c52aSchristos busy_ptr->nr_writebacks = 1; 2766ca2c52aSchristos 2776ca2c52aSchristos TRACE(trace_model,("Making VSCR busy.\n")); 2786ca2c52aSchristos 2796ca2c52aSchristos# The follow are AltiVec saturate operations 2806ca2c52aSchristos 2816ca2c52aSchristossigned8::model-function::altivec_signed_saturate_8:signed16 val, int *sat 2826ca2c52aSchristos signed8 rv; 2836ca2c52aSchristos if (val > 127) { 2846ca2c52aSchristos rv = 127; 2856ca2c52aSchristos *sat = 1; 2866ca2c52aSchristos } else if (val < -128) { 2876ca2c52aSchristos rv = -128; 2886ca2c52aSchristos *sat = 1; 2896ca2c52aSchristos } else { 2906ca2c52aSchristos rv = val; 2916ca2c52aSchristos *sat = 0; 2926ca2c52aSchristos } 2936ca2c52aSchristos return rv; 2946ca2c52aSchristos 2956ca2c52aSchristossigned16::model-function::altivec_signed_saturate_16:signed32 val, int *sat 2966ca2c52aSchristos signed16 rv; 2976ca2c52aSchristos if (val > 32767) { 2986ca2c52aSchristos rv = 32767; 2996ca2c52aSchristos *sat = 1; 3006ca2c52aSchristos } else if (val < -32768) { 3016ca2c52aSchristos rv = -32768; 3026ca2c52aSchristos *sat = 1; 3036ca2c52aSchristos } else { 3046ca2c52aSchristos rv = val; 3056ca2c52aSchristos *sat = 0; 3066ca2c52aSchristos } 3076ca2c52aSchristos return rv; 3086ca2c52aSchristos 3096ca2c52aSchristossigned32::model-function::altivec_signed_saturate_32:signed64 val, int *sat 3106ca2c52aSchristos signed32 rv; 3116ca2c52aSchristos if (val > 2147483647) { 3126ca2c52aSchristos rv = 2147483647; 3136ca2c52aSchristos *sat = 1; 3146ca2c52aSchristos } else if (val < -2147483648LL) { 3156ca2c52aSchristos rv = -2147483648LL; 3166ca2c52aSchristos *sat = 1; 3176ca2c52aSchristos } else { 3186ca2c52aSchristos rv = val; 3196ca2c52aSchristos *sat = 0; 3206ca2c52aSchristos } 3216ca2c52aSchristos return rv; 3226ca2c52aSchristos 3236ca2c52aSchristosunsigned8::model-function::altivec_unsigned_saturate_8:signed16 val, int *sat 3246ca2c52aSchristos unsigned8 rv; 3256ca2c52aSchristos if (val > 255) { 3266ca2c52aSchristos rv = 255; 3276ca2c52aSchristos *sat = 1; 3286ca2c52aSchristos } else if (val < 0) { 3296ca2c52aSchristos rv = 0; 3306ca2c52aSchristos *sat = 1; 3316ca2c52aSchristos } else { 3326ca2c52aSchristos rv = val; 3336ca2c52aSchristos *sat = 0; 3346ca2c52aSchristos } 3356ca2c52aSchristos return rv; 3366ca2c52aSchristos 3376ca2c52aSchristosunsigned16::model-function::altivec_unsigned_saturate_16:signed32 val, int *sat 3386ca2c52aSchristos unsigned16 rv; 3396ca2c52aSchristos if (val > 65535) { 3406ca2c52aSchristos rv = 65535; 3416ca2c52aSchristos *sat = 1; 3426ca2c52aSchristos } else if (val < 0) { 3436ca2c52aSchristos rv = 0; 3446ca2c52aSchristos *sat = 1; 3456ca2c52aSchristos } else { 3466ca2c52aSchristos rv = val; 3476ca2c52aSchristos *sat = 0; 3486ca2c52aSchristos } 3496ca2c52aSchristos return rv; 3506ca2c52aSchristos 3516ca2c52aSchristosunsigned32::model-function::altivec_unsigned_saturate_32:signed64 val, int *sat 3526ca2c52aSchristos unsigned32 rv; 3536ca2c52aSchristos if (val > 4294967295LL) { 3546ca2c52aSchristos rv = 4294967295LL; 3556ca2c52aSchristos *sat = 1; 3566ca2c52aSchristos } else if (val < 0) { 3576ca2c52aSchristos rv = 0; 3586ca2c52aSchristos *sat = 1; 3596ca2c52aSchristos } else { 3606ca2c52aSchristos rv = val; 3616ca2c52aSchristos *sat = 0; 3626ca2c52aSchristos } 3636ca2c52aSchristos return rv; 3646ca2c52aSchristos 3656ca2c52aSchristos# 3666ca2c52aSchristos# Load instructions, 6-14 ... 6-22. 3676ca2c52aSchristos# 3686ca2c52aSchristos 3696ca2c52aSchristos0.31,6.VS,11.RA,16.RB,21.7,31.0:X:av:lvebx %VD, %RA, %RB:Load Vector Element Byte Indexed 3706ca2c52aSchristos unsigned_word b; 3716ca2c52aSchristos unsigned_word EA; 3726ca2c52aSchristos unsigned_word eb; 3736ca2c52aSchristos if (RA_is_0) b = 0; 3746ca2c52aSchristos else b = *rA; 3756ca2c52aSchristos EA = b + *rB; 3766ca2c52aSchristos eb = EA & 0xf; 3776ca2c52aSchristos (*vS).b[AV_BINDEX(eb)] = MEM(unsigned, EA, 1); 3786ca2c52aSchristos PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0); 3796ca2c52aSchristos 3806ca2c52aSchristos0.31,6.VS,11.RA,16.RB,21.39,31.0:X:av:lvehx %VD, %RA, %RB:Load Vector Element Half Word Indexed 3816ca2c52aSchristos unsigned_word b; 3826ca2c52aSchristos unsigned_word EA; 3836ca2c52aSchristos unsigned_word eb; 3846ca2c52aSchristos if (RA_is_0) b = 0; 3856ca2c52aSchristos else b = *rA; 3866ca2c52aSchristos EA = (b + *rB) & ~1; 3876ca2c52aSchristos eb = EA & 0xf; 3886ca2c52aSchristos (*vS).h[AV_HINDEX(eb/2)] = MEM(unsigned, EA, 2); 3896ca2c52aSchristos PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0); 3906ca2c52aSchristos 3916ca2c52aSchristos0.31,6.VS,11.RA,16.RB,21.71,31.0:X:av:lvewx %VD, %RA, %RB:Load Vector Element Word Indexed 3926ca2c52aSchristos unsigned_word b; 3936ca2c52aSchristos unsigned_word EA; 3946ca2c52aSchristos unsigned_word eb; 3956ca2c52aSchristos if (RA_is_0) b = 0; 3966ca2c52aSchristos else b = *rA; 3976ca2c52aSchristos EA = (b + *rB) & ~3; 3986ca2c52aSchristos eb = EA & 0xf; 3996ca2c52aSchristos (*vS).w[eb/4] = MEM(unsigned, EA, 4); 4006ca2c52aSchristos PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0); 4016ca2c52aSchristos 4026ca2c52aSchristos 4036ca2c52aSchristos0.31,6.VS,11.RA,16.RB,21.6,31.0:X:av:lvsl %VD, %RA, %RB:Load Vector for Shift Left 4046ca2c52aSchristos unsigned_word b; 4056ca2c52aSchristos unsigned_word addr; 4066ca2c52aSchristos int i, j; 4076ca2c52aSchristos if (RA_is_0) b = 0; 4086ca2c52aSchristos else b = *rA; 4096ca2c52aSchristos addr = b + *rB; 4106ca2c52aSchristos j = addr & 0xf; 4116ca2c52aSchristos for (i = 0; i < 16; i++) 4126ca2c52aSchristos if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN) 4136ca2c52aSchristos (*vS).b[AV_BINDEX(i)] = j++; 4146ca2c52aSchristos else 4156ca2c52aSchristos (*vS).b[AV_BINDEX(15 - i)] = j++; 4166ca2c52aSchristos PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0); 4176ca2c52aSchristos 4186ca2c52aSchristos0.31,6.VS,11.RA,16.RB,21.38,31.0:X:av:lvsr %VD, %RA, %RB:Load Vector for Shift Right 4196ca2c52aSchristos unsigned_word b; 4206ca2c52aSchristos unsigned_word addr; 4216ca2c52aSchristos int i, j; 4226ca2c52aSchristos if (RA_is_0) b = 0; 4236ca2c52aSchristos else b = *rA; 4246ca2c52aSchristos addr = b + *rB; 4256ca2c52aSchristos j = 0x10 - (addr & 0xf); 4266ca2c52aSchristos for (i = 0; i < 16; i++) 4276ca2c52aSchristos if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN) 4286ca2c52aSchristos (*vS).b[AV_BINDEX(i)] = j++; 4296ca2c52aSchristos else 4306ca2c52aSchristos (*vS).b[AV_BINDEX(15 - i)] = j++; 4316ca2c52aSchristos PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0); 4326ca2c52aSchristos 4336ca2c52aSchristos 4346ca2c52aSchristos0.31,6.VS,11.RA,16.RB,21.103,31.0:X:av:lvx %VD, %RA, %RB:Load Vector Indexed 4356ca2c52aSchristos unsigned_word b; 4366ca2c52aSchristos unsigned_word EA; 4376ca2c52aSchristos if (RA_is_0) b = 0; 4386ca2c52aSchristos else b = *rA; 4396ca2c52aSchristos EA = (b + *rB) & ~0xf; 4406ca2c52aSchristos if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN) { 4416ca2c52aSchristos (*vS).w[0] = MEM(unsigned, EA + 0, 4); 4426ca2c52aSchristos (*vS).w[1] = MEM(unsigned, EA + 4, 4); 4436ca2c52aSchristos (*vS).w[2] = MEM(unsigned, EA + 8, 4); 4446ca2c52aSchristos (*vS).w[3] = MEM(unsigned, EA + 12, 4); 4456ca2c52aSchristos } else { 4466ca2c52aSchristos (*vS).w[0] = MEM(unsigned, EA + 12, 4); 4476ca2c52aSchristos (*vS).w[1] = MEM(unsigned, EA + 8, 4); 4486ca2c52aSchristos (*vS).w[2] = MEM(unsigned, EA + 4, 4); 4496ca2c52aSchristos (*vS).w[3] = MEM(unsigned, EA + 0, 4); 4506ca2c52aSchristos } 4516ca2c52aSchristos PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0); 4526ca2c52aSchristos 4536ca2c52aSchristos0.31,6.VS,11.RA,16.RB,21.359,31.0:X:av:lvxl %VD, %RA, %RB:Load Vector Indexed LRU 4546ca2c52aSchristos unsigned_word b; 4556ca2c52aSchristos unsigned_word EA; 4566ca2c52aSchristos if (RA_is_0) b = 0; 4576ca2c52aSchristos else b = *rA; 4586ca2c52aSchristos EA = (b + *rB) & ~0xf; 4596ca2c52aSchristos if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN) { 4606ca2c52aSchristos (*vS).w[0] = MEM(unsigned, EA + 0, 4); 4616ca2c52aSchristos (*vS).w[1] = MEM(unsigned, EA + 4, 4); 4626ca2c52aSchristos (*vS).w[2] = MEM(unsigned, EA + 8, 4); 4636ca2c52aSchristos (*vS).w[3] = MEM(unsigned, EA + 12, 4); 4646ca2c52aSchristos } else { 4656ca2c52aSchristos (*vS).w[0] = MEM(unsigned, EA + 12, 4); 4666ca2c52aSchristos (*vS).w[1] = MEM(unsigned, EA + 8, 4); 4676ca2c52aSchristos (*vS).w[2] = MEM(unsigned, EA + 4, 4); 4686ca2c52aSchristos (*vS).w[3] = MEM(unsigned, EA + 0, 4); 4696ca2c52aSchristos } 4706ca2c52aSchristos PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0); 4716ca2c52aSchristos 4726ca2c52aSchristos# 4736ca2c52aSchristos# Move to/from VSCR instructions, 6-23 & 6-24. 4746ca2c52aSchristos# 4756ca2c52aSchristos 4766ca2c52aSchristos0.4,6.VS,11.0,16.0,21.1540:VX:av:mfvscr %VS:Move from Vector Status and Control Register 4776ca2c52aSchristos (*vS).w[0] = 0; 4786ca2c52aSchristos (*vS).w[1] = 0; 4796ca2c52aSchristos (*vS).w[2] = 0; 4806ca2c52aSchristos (*vS).w[3] = VSCR; 4816ca2c52aSchristos PPC_INSN_FROM_VSCR(VS_BITMASK); 4826ca2c52aSchristos 4836ca2c52aSchristos0.4,6.0,11.0,16.VB,21.1604:VX:av:mtvscr %VB:Move to Vector Status and Control Register 4846ca2c52aSchristos VSCR = (*vB).w[3]; 4856ca2c52aSchristos PPC_INSN_TO_VSCR(VB_BITMASK); 4866ca2c52aSchristos 4876ca2c52aSchristos# 4886ca2c52aSchristos# Store instructions, 6-25 ... 6-29. 4896ca2c52aSchristos# 4906ca2c52aSchristos 4916ca2c52aSchristos0.31,6.VS,11.RA,16.RB,21.135,31.0:X:av:stvebx %VD, %RA, %RB:Store Vector Element Byte Indexed 4926ca2c52aSchristos unsigned_word b; 4936ca2c52aSchristos unsigned_word EA; 4946ca2c52aSchristos unsigned_word eb; 4956ca2c52aSchristos if (RA_is_0) b = 0; 4966ca2c52aSchristos else b = *rA; 4976ca2c52aSchristos EA = b + *rB; 4986ca2c52aSchristos eb = EA & 0xf; 4996ca2c52aSchristos if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN) 5006ca2c52aSchristos STORE(EA, 1, (*vS).b[eb]); 5016ca2c52aSchristos else 5026ca2c52aSchristos STORE(EA, 1, (*vS).b[15-eb]); 5036ca2c52aSchristos PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0); 5046ca2c52aSchristos 5056ca2c52aSchristos0.31,6.VS,11.RA,16.RB,21.167,31.0:X:av:stvehx %VD, %RA, %RB:Store Vector Element Half Word Indexed 5066ca2c52aSchristos unsigned_word b; 5076ca2c52aSchristos unsigned_word EA; 5086ca2c52aSchristos unsigned_word eb; 5096ca2c52aSchristos if (RA_is_0) b = 0; 5106ca2c52aSchristos else b = *rA; 5116ca2c52aSchristos EA = (b + *rB) & ~1; 5126ca2c52aSchristos eb = EA & 0xf; 5136ca2c52aSchristos if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN) 5146ca2c52aSchristos STORE(EA, 2, (*vS).h[eb/2]); 5156ca2c52aSchristos else 5166ca2c52aSchristos STORE(EA, 2, (*vS).h[7-eb]); 5176ca2c52aSchristos PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0); 5186ca2c52aSchristos 5196ca2c52aSchristos0.31,6.VS,11.RA,16.RB,21.199,31.0:X:av:stvewx %VD, %RA, %RB:Store Vector Element Word Indexed 5206ca2c52aSchristos unsigned_word b; 5216ca2c52aSchristos unsigned_word EA; 5226ca2c52aSchristos unsigned_word eb; 5236ca2c52aSchristos if (RA_is_0) b = 0; 5246ca2c52aSchristos else b = *rA; 5256ca2c52aSchristos EA = (b + *rB) & ~3; 5266ca2c52aSchristos eb = EA & 0xf; 5276ca2c52aSchristos if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN) 5286ca2c52aSchristos STORE(EA, 4, (*vS).w[eb/4]); 5296ca2c52aSchristos else 5306ca2c52aSchristos STORE(EA, 4, (*vS).w[3-(eb/4)]); 5316ca2c52aSchristos PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0); 5326ca2c52aSchristos 5336ca2c52aSchristos0.31,6.VS,11.RA,16.RB,21.231,31.0:X:av:stvx %VD, %RA, %RB:Store Vector Indexed 5346ca2c52aSchristos unsigned_word b; 5356ca2c52aSchristos unsigned_word EA; 5366ca2c52aSchristos if (RA_is_0) b = 0; 5376ca2c52aSchristos else b = *rA; 5386ca2c52aSchristos EA = (b + *rB) & ~0xf; 5396ca2c52aSchristos if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN) { 5406ca2c52aSchristos STORE(EA + 0, 4, (*vS).w[0]); 5416ca2c52aSchristos STORE(EA + 4, 4, (*vS).w[1]); 5426ca2c52aSchristos STORE(EA + 8, 4, (*vS).w[2]); 5436ca2c52aSchristos STORE(EA + 12, 4, (*vS).w[3]); 5446ca2c52aSchristos } else { 5456ca2c52aSchristos STORE(EA + 12, 4, (*vS).w[0]); 5466ca2c52aSchristos STORE(EA + 8, 4, (*vS).w[1]); 5476ca2c52aSchristos STORE(EA + 4, 4, (*vS).w[2]); 5486ca2c52aSchristos STORE(EA + 0, 4, (*vS).w[3]); 5496ca2c52aSchristos } 5506ca2c52aSchristos PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0); 5516ca2c52aSchristos 5526ca2c52aSchristos0.31,6.VS,11.RA,16.RB,21.487,31.0:X:av:stvxl %VD, %RA, %RB:Store Vector Indexed LRU 5536ca2c52aSchristos unsigned_word b; 5546ca2c52aSchristos unsigned_word EA; 5556ca2c52aSchristos if (RA_is_0) b = 0; 5566ca2c52aSchristos else b = *rA; 5576ca2c52aSchristos EA = (b + *rB) & ~0xf; 5586ca2c52aSchristos if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN) { 5596ca2c52aSchristos STORE(EA + 0, 4, (*vS).w[0]); 5606ca2c52aSchristos STORE(EA + 4, 4, (*vS).w[1]); 5616ca2c52aSchristos STORE(EA + 8, 4, (*vS).w[2]); 5626ca2c52aSchristos STORE(EA + 12, 4, (*vS).w[3]); 5636ca2c52aSchristos } else { 5646ca2c52aSchristos STORE(EA + 12, 4, (*vS).w[0]); 5656ca2c52aSchristos STORE(EA + 8, 4, (*vS).w[1]); 5666ca2c52aSchristos STORE(EA + 4, 4, (*vS).w[2]); 5676ca2c52aSchristos STORE(EA + 0, 4, (*vS).w[3]); 5686ca2c52aSchristos } 5696ca2c52aSchristos PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0); 5706ca2c52aSchristos 5716ca2c52aSchristos# 5726ca2c52aSchristos# Vector Add instructions, 6-30 ... 6-40. 5736ca2c52aSchristos# 5746ca2c52aSchristos 5756ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.384:VX:av:vaddcuw %VD, %VA, %VB:Vector Add Carryout Unsigned Word 5766ca2c52aSchristos unsigned64 temp; 5776ca2c52aSchristos int i; 5786ca2c52aSchristos for (i = 0; i < 4; i++) { 5796ca2c52aSchristos temp = (unsigned64)(*vA).w[i] + (unsigned64)(*vB).w[i]; 5806ca2c52aSchristos (*vS).w[i] = temp >> 32; 5816ca2c52aSchristos } 5826ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 5836ca2c52aSchristos 5846ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.10:VX:av:vaddfp %VD, %VA, %VB:Vector Add Floating Point 5856ca2c52aSchristos int i; 5866ca2c52aSchristos unsigned32 f; 5876ca2c52aSchristos sim_fpu a, b, d; 5886ca2c52aSchristos for (i = 0; i < 4; i++) { 5896ca2c52aSchristos sim_fpu_32to (&a, (*vA).w[i]); 5906ca2c52aSchristos sim_fpu_32to (&b, (*vB).w[i]); 5916ca2c52aSchristos sim_fpu_add (&d, &a, &b); 5926ca2c52aSchristos sim_fpu_to32 (&f, &d); 5936ca2c52aSchristos (*vS).w[i] = f; 5946ca2c52aSchristos } 5956ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 5966ca2c52aSchristos 5976ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.768:VX:av:vaddsbs %VD, %VA, %VB:Vector Add Signed Byte Saturate 5986ca2c52aSchristos int i, sat, tempsat; 5996ca2c52aSchristos signed16 temp; 6006ca2c52aSchristos for (i = 0; i < 16; i++) { 6016ca2c52aSchristos temp = (signed16)(signed8)(*vA).b[i] + (signed16)(signed8)(*vB).b[i]; 6026ca2c52aSchristos (*vS).b[i] = altivec_signed_saturate_8(temp, &tempsat); 6036ca2c52aSchristos sat |= tempsat; 6046ca2c52aSchristos } 6056ca2c52aSchristos ALTIVEC_SET_SAT(sat); 6066ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 6076ca2c52aSchristos 6086ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.832:VX:av:vaddshs %VD, %VA, %VB:Vector Add Signed Half Word Saturate 6096ca2c52aSchristos int i, sat, tempsat; 6106ca2c52aSchristos signed32 temp, a, b; 6116ca2c52aSchristos for (i = 0; i < 8; i++) { 6126ca2c52aSchristos a = (signed32)(signed16)(*vA).h[i]; 6136ca2c52aSchristos b = (signed32)(signed16)(*vB).h[i]; 6146ca2c52aSchristos temp = a + b; 6156ca2c52aSchristos (*vS).h[i] = altivec_signed_saturate_16(temp, &tempsat); 6166ca2c52aSchristos sat |= tempsat; 6176ca2c52aSchristos } 6186ca2c52aSchristos ALTIVEC_SET_SAT(sat); 6196ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 6206ca2c52aSchristos 6216ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.896:VX:av:vaddsws %VD, %VA, %VB:Vector Add Signed Word Saturate 6226ca2c52aSchristos int i, sat, tempsat; 6236ca2c52aSchristos signed64 temp; 6246ca2c52aSchristos for (i = 0; i < 4; i++) { 6256ca2c52aSchristos temp = (signed64)(signed32)(*vA).w[i] + (signed64)(signed32)(*vB).w[i]; 6266ca2c52aSchristos (*vS).w[i] = altivec_signed_saturate_32(temp, &tempsat); 6276ca2c52aSchristos sat |= tempsat; 6286ca2c52aSchristos } 6296ca2c52aSchristos ALTIVEC_SET_SAT(sat); 6306ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 6316ca2c52aSchristos 6326ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.0:VX:av:vaddubm %VD, %VA, %VB:Vector Add Unsigned Byte Modulo 6336ca2c52aSchristos int i; 6346ca2c52aSchristos for (i = 0; i < 16; i++) 6356ca2c52aSchristos (*vS).b[i] = ((*vA).b[i] + (*vB).b[i]) & 0xff; 6366ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 6376ca2c52aSchristos 6386ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.512:VX:av:vaddubs %VD, %VA, %VB:Vector Add Unsigned Byte Saturate 6396ca2c52aSchristos int i, sat, tempsat; 6406ca2c52aSchristos signed16 temp; 6416ca2c52aSchristos sat = 0; 6426ca2c52aSchristos for (i = 0; i < 16; i++) { 6436ca2c52aSchristos temp = (signed16)(unsigned8)(*vA).b[i] + (signed16)(unsigned8)(*vB).b[i]; 6446ca2c52aSchristos (*vS).b[i] = altivec_unsigned_saturate_8(temp, &tempsat); 6456ca2c52aSchristos sat |= tempsat; 6466ca2c52aSchristos } 6476ca2c52aSchristos ALTIVEC_SET_SAT(sat); 6486ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 6496ca2c52aSchristos 6506ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.64:VX:av:vadduhm %VD, %VA, %VB:Vector Add Unsigned Half Word Modulo 6516ca2c52aSchristos int i; 6526ca2c52aSchristos for (i = 0; i < 8; i++) 6536ca2c52aSchristos (*vS).h[i] = ((*vA).h[i] + (*vB).h[i]) & 0xffff; 6546ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 6556ca2c52aSchristos 6566ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.576:VX:av:vadduhs %VD, %VA, %VB:Vector Add Unsigned Half Word Saturate 6576ca2c52aSchristos int i, sat, tempsat; 6586ca2c52aSchristos signed32 temp; 6596ca2c52aSchristos for (i = 0; i < 8; i++) { 6606ca2c52aSchristos temp = (signed32)(unsigned16)(*vA).h[i] + (signed32)(unsigned16)(*vB).h[i]; 6616ca2c52aSchristos (*vS).h[i] = altivec_unsigned_saturate_16(temp, &tempsat); 6626ca2c52aSchristos sat |= tempsat; 6636ca2c52aSchristos } 6646ca2c52aSchristos ALTIVEC_SET_SAT(sat); 6656ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 6666ca2c52aSchristos 6676ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.128:VX:av:vadduwm %VD, %VA, %VB:Vector Add Unsigned Word Modulo 6686ca2c52aSchristos int i; 6696ca2c52aSchristos for (i = 0; i < 4; i++) 6706ca2c52aSchristos (*vS).w[i] = (*vA).w[i] + (*vB).w[i]; 6716ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 6726ca2c52aSchristos 6736ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.640:VX:av:vadduws %VD, %VA, %VB:Vector Add Unsigned Word Saturate 6746ca2c52aSchristos int i, sat, tempsat; 6756ca2c52aSchristos signed64 temp; 6766ca2c52aSchristos for (i = 0; i < 4; i++) { 6776ca2c52aSchristos temp = (signed64)(unsigned32)(*vA).w[i] + (signed64)(unsigned32)(*vB).w[i]; 6786ca2c52aSchristos (*vS).w[i] = altivec_unsigned_saturate_32(temp, &tempsat); 6796ca2c52aSchristos sat |= tempsat; 6806ca2c52aSchristos } 6816ca2c52aSchristos ALTIVEC_SET_SAT(sat); 6826ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 6836ca2c52aSchristos 6846ca2c52aSchristos# 6856ca2c52aSchristos# Vector AND instructions, 6-41, 6-42 6866ca2c52aSchristos# 6876ca2c52aSchristos 6886ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1028:VX:av:vand %VD, %VA, %VB:Vector Logical AND 6896ca2c52aSchristos int i; 6906ca2c52aSchristos for (i = 0; i < 4; i++) 6916ca2c52aSchristos (*vS).w[i] = (*vA).w[i] & (*vB).w[i]; 6926ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 6936ca2c52aSchristos 6946ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1092:VX:av:vandc %VD, %VA, %VB:Vector Logical AND with Compliment 6956ca2c52aSchristos int i; 6966ca2c52aSchristos for (i = 0; i < 4; i++) 6976ca2c52aSchristos (*vS).w[i] = (*vA).w[i] & ~((*vB).w[i]); 6986ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 6996ca2c52aSchristos 7006ca2c52aSchristos 7016ca2c52aSchristos# 7026ca2c52aSchristos# Vector Average instructions, 6-43, 6-48 7036ca2c52aSchristos# 7046ca2c52aSchristos 7056ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1282:VX:av:vavgsb %VD, %VA, %VB:Vector Average Signed Byte 7066ca2c52aSchristos int i; 7076ca2c52aSchristos signed16 temp, a, b; 7086ca2c52aSchristos for (i = 0; i < 16; i++) { 7096ca2c52aSchristos a = (signed16)(signed8)(*vA).b[i]; 7106ca2c52aSchristos b = (signed16)(signed8)(*vB).b[i]; 7116ca2c52aSchristos temp = a + b + 1; 7126ca2c52aSchristos (*vS).b[i] = (temp >> 1) & 0xff; 7136ca2c52aSchristos } 7146ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 7156ca2c52aSchristos 7166ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1346:VX:av:vavgsh %VD, %VA, %VB:Vector Average Signed Half Word 7176ca2c52aSchristos int i; 7186ca2c52aSchristos signed32 temp, a, b; 7196ca2c52aSchristos for (i = 0; i < 8; i++) { 7206ca2c52aSchristos a = (signed32)(signed16)(*vA).h[i]; 7216ca2c52aSchristos b = (signed32)(signed16)(*vB).h[i]; 7226ca2c52aSchristos temp = a + b + 1; 7236ca2c52aSchristos (*vS).h[i] = (temp >> 1) & 0xffff; 7246ca2c52aSchristos } 7256ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 7266ca2c52aSchristos 7276ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1410:VX:av:vavgsw %VD, %VA, %VB:Vector Average Signed Word 7286ca2c52aSchristos int i; 7296ca2c52aSchristos signed64 temp, a, b; 7306ca2c52aSchristos for (i = 0; i < 4; i++) { 7316ca2c52aSchristos a = (signed64)(signed32)(*vA).w[i]; 7326ca2c52aSchristos b = (signed64)(signed32)(*vB).w[i]; 7336ca2c52aSchristos temp = a + b + 1; 7346ca2c52aSchristos (*vS).w[i] = (temp >> 1) & 0xffffffff; 7356ca2c52aSchristos } 7366ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 7376ca2c52aSchristos 7386ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1026:VX:av:vavgub %VD, %VA, %VB:Vector Average Unsigned Byte 7396ca2c52aSchristos int i; 7406ca2c52aSchristos unsigned16 temp, a, b; 7416ca2c52aSchristos for (i = 0; i < 16; i++) { 7426ca2c52aSchristos a = (*vA).b[i]; 7436ca2c52aSchristos b = (*vB).b[i]; 7446ca2c52aSchristos temp = a + b + 1; 7456ca2c52aSchristos (*vS).b[i] = (temp >> 1) & 0xff; 7466ca2c52aSchristos } 7476ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 7486ca2c52aSchristos 7496ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1090:VX:av:vavguh %VD, %VA, %VB:Vector Average Unsigned Half Word 7506ca2c52aSchristos int i; 7516ca2c52aSchristos unsigned32 temp, a, b; 7526ca2c52aSchristos for (i = 0; i < 8; i++) { 7536ca2c52aSchristos a = (*vA).h[i]; 7546ca2c52aSchristos b = (*vB).h[i]; 7556ca2c52aSchristos temp = a + b + 1; 7566ca2c52aSchristos (*vS).h[i] = (temp >> 1) & 0xffff; 7576ca2c52aSchristos } 7586ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 7596ca2c52aSchristos 7606ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1154:VX:av:vavguw %VD, %VA, %VB:Vector Average Unsigned Word 7616ca2c52aSchristos int i; 7626ca2c52aSchristos unsigned64 temp, a, b; 7636ca2c52aSchristos for (i = 0; i < 4; i++) { 7646ca2c52aSchristos a = (*vA).w[i]; 7656ca2c52aSchristos b = (*vB).w[i]; 7666ca2c52aSchristos temp = a + b + 1; 7676ca2c52aSchristos (*vS).w[i] = (temp >> 1) & 0xffffffff; 7686ca2c52aSchristos } 7696ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 7706ca2c52aSchristos 7716ca2c52aSchristos# 7726ca2c52aSchristos# Vector Fixed Point Convert instructions, 6-49, 6-50 7736ca2c52aSchristos# 7746ca2c52aSchristos 7756ca2c52aSchristos0.4,6.VS,11.UIMM,16.VB,21.842:VX:av:vcfsx %VD, %VB, %UIMM:Vector Convert From Signed Fixed-Point Word 7766ca2c52aSchristos int i; 7776ca2c52aSchristos unsigned32 f; 7786ca2c52aSchristos sim_fpu b, div, d; 7796ca2c52aSchristos for (i = 0; i < 4; i++) { 7806ca2c52aSchristos sim_fpu_32to (&b, (*vB).w[i]); 7816ca2c52aSchristos sim_fpu_u32to (&div, 2 << UIMM, sim_fpu_round_default); 7826ca2c52aSchristos sim_fpu_div (&d, &b, &div); 7836ca2c52aSchristos sim_fpu_to32 (&f, &d); 7846ca2c52aSchristos (*vS).w[i] = f; 7856ca2c52aSchristos } 7866ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VB_BITMASK); 7876ca2c52aSchristos 7886ca2c52aSchristos0.4,6.VS,11.UIMM,16.VB,21.778:VX:av:vcfux %VD, %VA, %UIMM:Vector Convert From Unsigned Fixed-Point Word 7896ca2c52aSchristos int i; 7906ca2c52aSchristos unsigned32 f; 7916ca2c52aSchristos sim_fpu b, d, div; 7926ca2c52aSchristos for (i = 0; i < 4; i++) { 7936ca2c52aSchristos sim_fpu_32to (&b, (*vB).w[i]); 7946ca2c52aSchristos sim_fpu_u32to (&div, 2 << UIMM, sim_fpu_round_default); 7956ca2c52aSchristos sim_fpu_div (&d, &b, &div); 7966ca2c52aSchristos sim_fpu_to32u (&f, &d, sim_fpu_round_default); 7976ca2c52aSchristos (*vS).w[i] = f; 7986ca2c52aSchristos } 7996ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VB_BITMASK); 8006ca2c52aSchristos 8016ca2c52aSchristos# 8026ca2c52aSchristos# Vector Compare instructions, 6-51 ... 6-64 8036ca2c52aSchristos# 8046ca2c52aSchristos 8056ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.RC,22.966:VXR:av:vcmpbpfpx %VD, %VA, %VB:Vector Compare Bounds Floating Point 8066ca2c52aSchristos int i, le, ge; 8076ca2c52aSchristos sim_fpu a, b, d; 8086ca2c52aSchristos for (i = 0; i < 4; i++) { 8096ca2c52aSchristos sim_fpu_32to (&a, (*vA).w[i]); 8106ca2c52aSchristos sim_fpu_32to (&b, (*vB).w[i]); 8116ca2c52aSchristos le = sim_fpu_is_le(&a, &b); 8126ca2c52aSchristos ge = sim_fpu_is_ge(&a, &b); 8136ca2c52aSchristos (*vS).w[i] = (le ? 0 : 1 << 31) | (ge ? 0 : 1 << 30); 8146ca2c52aSchristos } 8156ca2c52aSchristos if (RC) 8166ca2c52aSchristos ALTIVEC_SET_CR6(vS, 0); 8176ca2c52aSchristos PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0); 8186ca2c52aSchristos 8196ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.RC,22.198:VXR:av:vcmpeqfpx %VD, %VA, %VB:Vector Compare Equal-to-Floating Point 8206ca2c52aSchristos int i; 8216ca2c52aSchristos sim_fpu a, b; 8226ca2c52aSchristos for (i = 0; i < 4; i++) { 8236ca2c52aSchristos sim_fpu_32to (&a, (*vA).w[i]); 8246ca2c52aSchristos sim_fpu_32to (&b, (*vB).w[i]); 8256ca2c52aSchristos if (sim_fpu_is_eq(&a, &b)) 8266ca2c52aSchristos (*vS).w[i] = 0xffffffff; 8276ca2c52aSchristos else 8286ca2c52aSchristos (*vS).w[i] = 0; 8296ca2c52aSchristos } 8306ca2c52aSchristos if (RC) 8316ca2c52aSchristos ALTIVEC_SET_CR6(vS, 1); 8326ca2c52aSchristos PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0); 8336ca2c52aSchristos 8346ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.RC,22.6:VXR:av:vcmpequbx %VD, %VA, %VB:Vector Compare Equal-to Unsigned Byte 8356ca2c52aSchristos int i; 8366ca2c52aSchristos for (i = 0; i < 16; i++) 8376ca2c52aSchristos if ((*vA).b[i] == (*vB).b[i]) 8386ca2c52aSchristos (*vS).b[i] = 0xff; 8396ca2c52aSchristos else 8406ca2c52aSchristos (*vS).b[i] = 0; 8416ca2c52aSchristos if (RC) 8426ca2c52aSchristos ALTIVEC_SET_CR6(vS, 1); 8436ca2c52aSchristos PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0); 8446ca2c52aSchristos 8456ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.RC,22.70:VXR:av:vcmpequhx %VD, %VA, %VB:Vector Compare Equal-to Unsigned Half Word 8466ca2c52aSchristos int i; 8476ca2c52aSchristos for (i = 0; i < 8; i++) 8486ca2c52aSchristos if ((*vA).h[i] == (*vB).h[i]) 8496ca2c52aSchristos (*vS).h[i] = 0xffff; 8506ca2c52aSchristos else 8516ca2c52aSchristos (*vS).h[i] = 0; 8526ca2c52aSchristos if (RC) 8536ca2c52aSchristos ALTIVEC_SET_CR6(vS, 1); 8546ca2c52aSchristos PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0); 8556ca2c52aSchristos 8566ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.RC,22.134:VXR:av:vcmpequwx %VD, %VA, %VB:Vector Compare Equal-to Unsigned Word 8576ca2c52aSchristos int i; 8586ca2c52aSchristos for (i = 0; i < 4; i++) 8596ca2c52aSchristos if ((*vA).w[i] == (*vB).w[i]) 8606ca2c52aSchristos (*vS).w[i] = 0xffffffff; 8616ca2c52aSchristos else 8626ca2c52aSchristos (*vS).w[i] = 0; 8636ca2c52aSchristos if (RC) 8646ca2c52aSchristos ALTIVEC_SET_CR6(vS, 1); 8656ca2c52aSchristos PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0); 8666ca2c52aSchristos 8676ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.RC,22.454:VXR:av:vcmpgefpx %VD, %VA, %VB:Vector Compare Greater-Than-or-Equal-to Floating Point 8686ca2c52aSchristos int i; 8696ca2c52aSchristos sim_fpu a, b; 8706ca2c52aSchristos for (i = 0; i < 4; i++) { 8716ca2c52aSchristos sim_fpu_32to (&a, (*vA).w[i]); 8726ca2c52aSchristos sim_fpu_32to (&b, (*vB).w[i]); 8736ca2c52aSchristos if (sim_fpu_is_ge(&a, &b)) 8746ca2c52aSchristos (*vS).w[i] = 0xffffffff; 8756ca2c52aSchristos else 8766ca2c52aSchristos (*vS).w[i] = 0; 8776ca2c52aSchristos } 8786ca2c52aSchristos if (RC) 8796ca2c52aSchristos ALTIVEC_SET_CR6(vS, 1); 8806ca2c52aSchristos PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0); 8816ca2c52aSchristos 8826ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.RC,22.710:VXR:av:vcmpgtfpx %VD, %VA, %VB:Vector Compare Greater-Than Floating Point 8836ca2c52aSchristos int i; 8846ca2c52aSchristos sim_fpu a, b; 8856ca2c52aSchristos for (i = 0; i < 4; i++) { 8866ca2c52aSchristos sim_fpu_32to (&a, (*vA).w[i]); 8876ca2c52aSchristos sim_fpu_32to (&b, (*vB).w[i]); 8886ca2c52aSchristos if (sim_fpu_is_gt(&a, &b)) 8896ca2c52aSchristos (*vS).w[i] = 0xffffffff; 8906ca2c52aSchristos else 8916ca2c52aSchristos (*vS).w[i] = 0; 8926ca2c52aSchristos } 8936ca2c52aSchristos if (RC) 8946ca2c52aSchristos ALTIVEC_SET_CR6(vS, 1); 8956ca2c52aSchristos PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0); 8966ca2c52aSchristos 8976ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.RC,22.774:VXR:av:vcmpgtsbx %VD, %VA, %VB:Vector Compare Greater-Than Signed Byte 8986ca2c52aSchristos int i; 8996ca2c52aSchristos signed8 a, b; 9006ca2c52aSchristos for (i = 0; i < 16; i++) { 9016ca2c52aSchristos a = (*vA).b[i]; 9026ca2c52aSchristos b = (*vB).b[i]; 9036ca2c52aSchristos if (a > b) 9046ca2c52aSchristos (*vS).b[i] = 0xff; 9056ca2c52aSchristos else 9066ca2c52aSchristos (*vS).b[i] = 0; 9076ca2c52aSchristos } 9086ca2c52aSchristos if (RC) 9096ca2c52aSchristos ALTIVEC_SET_CR6(vS, 1); 9106ca2c52aSchristos PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0); 9116ca2c52aSchristos 9126ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.RC,22.838:VXR:av:vcmpgtshx %VD, %VA, %VB:Vector Compare Greater-Than Signed Half Word 9136ca2c52aSchristos int i; 9146ca2c52aSchristos signed16 a, b; 9156ca2c52aSchristos for (i = 0; i < 8; i++) { 9166ca2c52aSchristos a = (*vA).h[i]; 9176ca2c52aSchristos b = (*vB).h[i]; 9186ca2c52aSchristos if (a > b) 9196ca2c52aSchristos (*vS).h[i] = 0xffff; 9206ca2c52aSchristos else 9216ca2c52aSchristos (*vS).h[i] = 0; 9226ca2c52aSchristos } 9236ca2c52aSchristos if (RC) 9246ca2c52aSchristos ALTIVEC_SET_CR6(vS, 1); 9256ca2c52aSchristos PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0); 9266ca2c52aSchristos 9276ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.RC,22.902:VXR:av:vcmpgtswx %VD, %VA, %VB:Vector Compare Greater-Than Signed Word 9286ca2c52aSchristos int i; 9296ca2c52aSchristos signed32 a, b; 9306ca2c52aSchristos for (i = 0; i < 4; i++) { 9316ca2c52aSchristos a = (*vA).w[i]; 9326ca2c52aSchristos b = (*vB).w[i]; 9336ca2c52aSchristos if (a > b) 9346ca2c52aSchristos (*vS).w[i] = 0xffffffff; 9356ca2c52aSchristos else 9366ca2c52aSchristos (*vS).w[i] = 0; 9376ca2c52aSchristos } 9386ca2c52aSchristos if (RC) 9396ca2c52aSchristos ALTIVEC_SET_CR6(vS, 1); 9406ca2c52aSchristos PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0); 9416ca2c52aSchristos 9426ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.RC,22.518:VXR:av:vcmpgtubx %VD, %VA, %VB:Vector Compare Greater-Than Unsigned Byte 9436ca2c52aSchristos int i; 9446ca2c52aSchristos unsigned8 a, b; 9456ca2c52aSchristos for (i = 0; i < 16; i++) { 9466ca2c52aSchristos a = (*vA).b[i]; 9476ca2c52aSchristos b = (*vB).b[i]; 9486ca2c52aSchristos if (a > b) 9496ca2c52aSchristos (*vS).b[i] = 0xff; 9506ca2c52aSchristos else 9516ca2c52aSchristos (*vS).b[i] = 0; 9526ca2c52aSchristos } 9536ca2c52aSchristos if (RC) 9546ca2c52aSchristos ALTIVEC_SET_CR6(vS, 1); 9556ca2c52aSchristos PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0); 9566ca2c52aSchristos 9576ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.RC,22.582:VXR:av:vcmpgtuhx %VD, %VA, %VB:Vector Compare Greater-Than Unsigned Half Word 9586ca2c52aSchristos int i; 9596ca2c52aSchristos unsigned16 a, b; 9606ca2c52aSchristos for (i = 0; i < 8; i++) { 9616ca2c52aSchristos a = (*vA).h[i]; 9626ca2c52aSchristos b = (*vB).h[i]; 9636ca2c52aSchristos if (a > b) 9646ca2c52aSchristos (*vS).h[i] = 0xffff; 9656ca2c52aSchristos else 9666ca2c52aSchristos (*vS).h[i] = 0; 9676ca2c52aSchristos } 9686ca2c52aSchristos if (RC) 9696ca2c52aSchristos ALTIVEC_SET_CR6(vS, 1); 9706ca2c52aSchristos PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0); 9716ca2c52aSchristos 9726ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.RC,22.646:VXR:av:vcmpgtuwx %VD, %VA, %VB:Vector Compare Greater-Than Unsigned Word 9736ca2c52aSchristos int i; 9746ca2c52aSchristos unsigned32 a, b; 9756ca2c52aSchristos for (i = 0; i < 4; i++) { 9766ca2c52aSchristos a = (*vA).w[i]; 9776ca2c52aSchristos b = (*vB).w[i]; 9786ca2c52aSchristos if (a > b) 9796ca2c52aSchristos (*vS).w[i] = 0xffffffff; 9806ca2c52aSchristos else 9816ca2c52aSchristos (*vS).w[i] = 0; 9826ca2c52aSchristos } 9836ca2c52aSchristos if (RC) 9846ca2c52aSchristos ALTIVEC_SET_CR6(vS, 1); 9856ca2c52aSchristos PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0); 9866ca2c52aSchristos 9876ca2c52aSchristos# 9886ca2c52aSchristos# Vector Convert instructions, 6-65, 6-66. 9896ca2c52aSchristos# 9906ca2c52aSchristos 9916ca2c52aSchristos0.4,6.VS,11.UIMM,16.VB,21.970:VX:av:vctsxs %VD, %VB, %UIMM:Vector Convert to Signed Fixed-Point Word Saturate 9926ca2c52aSchristos int i, sat, tempsat; 9936ca2c52aSchristos signed64 temp; 9946ca2c52aSchristos sim_fpu a, b, m; 9956ca2c52aSchristos sat = 0; 9966ca2c52aSchristos for (i = 0; i < 4; i++) { 9976ca2c52aSchristos sim_fpu_32to (&b, (*vB).w[i]); 9986ca2c52aSchristos sim_fpu_u32to (&m, 2 << UIMM, sim_fpu_round_default); 9996ca2c52aSchristos sim_fpu_mul (&a, &b, &m); 10006ca2c52aSchristos sim_fpu_to64i (&temp, &a, sim_fpu_round_default); 10016ca2c52aSchristos (*vS).w[i] = altivec_signed_saturate_32(temp, &tempsat); 10026ca2c52aSchristos sat |= tempsat; 10036ca2c52aSchristos } 10046ca2c52aSchristos ALTIVEC_SET_SAT(sat); 10056ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VB_BITMASK); 10066ca2c52aSchristos 10076ca2c52aSchristos0.4,6.VS,11.UIMM,16.VB,21.906:VX:av:vctuxs %VD, %VB, %UIMM:Vector Convert to Unsigned Fixed-Point Word Saturate 10086ca2c52aSchristos int i, sat, tempsat; 10096ca2c52aSchristos signed64 temp; 10106ca2c52aSchristos sim_fpu a, b, m; 10116ca2c52aSchristos sat = 0; 10126ca2c52aSchristos for (i = 0; i < 4; i++) { 10136ca2c52aSchristos sim_fpu_32to (&b, (*vB).w[i]); 10146ca2c52aSchristos sim_fpu_u32to (&m, 2 << UIMM, sim_fpu_round_default); 10156ca2c52aSchristos sim_fpu_mul (&a, &b, &m); 10166ca2c52aSchristos sim_fpu_to64u (&temp, &a, sim_fpu_round_default); 10176ca2c52aSchristos (*vS).w[i] = altivec_unsigned_saturate_32(temp, &tempsat); 10186ca2c52aSchristos sat |= tempsat; 10196ca2c52aSchristos } 10206ca2c52aSchristos ALTIVEC_SET_SAT(sat); 10216ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VB_BITMASK); 10226ca2c52aSchristos 10236ca2c52aSchristos# 10246ca2c52aSchristos# Vector Estimate instructions, 6-67 ... 6-70. 10256ca2c52aSchristos# 10266ca2c52aSchristos 10276ca2c52aSchristos0.4,6.VS,11.0,16.VB,21.394:VX:av:vexptefp %VD, %VB:Vector 2 Raised to the Exponent Estimate Floating Point 10286ca2c52aSchristos int i; 10296ca2c52aSchristos unsigned32 f; 10306ca2c52aSchristos signed32 bi; 10316ca2c52aSchristos sim_fpu b, d; 10326ca2c52aSchristos for (i = 0; i < 4; i++) { 10336ca2c52aSchristos /*HACK!*/ 10346ca2c52aSchristos sim_fpu_32to (&b, (*vB).w[i]); 10356ca2c52aSchristos sim_fpu_to32i (&bi, &b, sim_fpu_round_default); 10366ca2c52aSchristos bi = 2 ^ bi; 10376ca2c52aSchristos sim_fpu_32to (&d, bi); 10386ca2c52aSchristos sim_fpu_to32 (&f, &d); 10396ca2c52aSchristos (*vS).w[i] = f; 10406ca2c52aSchristos } 10416ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VB_BITMASK); 10426ca2c52aSchristos 10436ca2c52aSchristos0.4,6.VS,11.0,16.VB,21.458:VX:av:vlogefp %VD, %VB:Vector Log2 Estimate Floating Point 10446ca2c52aSchristos int i; 10456ca2c52aSchristos unsigned32 c, u, f; 10466ca2c52aSchristos sim_fpu b, cfpu, d; 10476ca2c52aSchristos for (i = 0; i < 4; i++) { 10486ca2c52aSchristos /*HACK!*/ 10496ca2c52aSchristos sim_fpu_32to (&b, (*vB).w[i]); 10506ca2c52aSchristos sim_fpu_to32u (&u, &b, sim_fpu_round_default); 10516ca2c52aSchristos for (c = 0; (u /= 2) > 1; c++) 10526ca2c52aSchristos ; 10536ca2c52aSchristos sim_fpu_32to (&cfpu, c); 10546ca2c52aSchristos sim_fpu_add (&d, &b, &cfpu); 10556ca2c52aSchristos sim_fpu_to32 (&f, &d); 10566ca2c52aSchristos (*vS).w[i] = f; 10576ca2c52aSchristos } 10586ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VB_BITMASK); 10596ca2c52aSchristos 10606ca2c52aSchristos# 10616ca2c52aSchristos# Vector Multiply Add instruction, 6-71 10626ca2c52aSchristos# 10636ca2c52aSchristos 10646ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.VC,26.46:VAX:av:vmaddfp %VD, %VA, %VB, %VC:Vector Multiply Add Floating Point 10656ca2c52aSchristos int i; 10666ca2c52aSchristos unsigned32 f; 10676ca2c52aSchristos sim_fpu a, b, c, d, e; 10686ca2c52aSchristos for (i = 0; i < 4; i++) { 10696ca2c52aSchristos sim_fpu_32to (&a, (*vA).w[i]); 10706ca2c52aSchristos sim_fpu_32to (&b, (*vB).w[i]); 10716ca2c52aSchristos sim_fpu_32to (&c, (*vC).w[i]); 10726ca2c52aSchristos sim_fpu_mul (&e, &a, &c); 10736ca2c52aSchristos sim_fpu_add (&d, &e, &b); 10746ca2c52aSchristos sim_fpu_to32 (&f, &d); 10756ca2c52aSchristos (*vS).w[i] = f; 10766ca2c52aSchristos } 10776ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK); 10786ca2c52aSchristos 10796ca2c52aSchristos 10806ca2c52aSchristos# 10816ca2c52aSchristos# Vector Maximum instructions, 6-72 ... 6-78. 10826ca2c52aSchristos# 10836ca2c52aSchristos 10846ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1034:VX:av:vmaxfp %VD, %VA, %VB:Vector Maximum Floating Point 10856ca2c52aSchristos int i; 10866ca2c52aSchristos unsigned32 f; 10876ca2c52aSchristos sim_fpu a, b, d; 10886ca2c52aSchristos for (i = 0; i < 4; i++) { 10896ca2c52aSchristos sim_fpu_32to (&a, (*vA).w[i]); 10906ca2c52aSchristos sim_fpu_32to (&b, (*vB).w[i]); 10916ca2c52aSchristos sim_fpu_max (&d, &a, &b); 10926ca2c52aSchristos sim_fpu_to32 (&f, &d); 10936ca2c52aSchristos (*vS).w[i] = f; 10946ca2c52aSchristos } 10956ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 10966ca2c52aSchristos 10976ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.258:VX:av:vmaxsb %VD, %VA, %VB:Vector Maximum Signed Byte 10986ca2c52aSchristos int i; 10996ca2c52aSchristos signed8 a, b; 11006ca2c52aSchristos for (i = 0; i < 16; i++) { 11016ca2c52aSchristos a = (*vA).b[i]; 11026ca2c52aSchristos b = (*vB).b[i]; 11036ca2c52aSchristos if (a > b) 11046ca2c52aSchristos (*vS).b[i] = a; 11056ca2c52aSchristos else 11066ca2c52aSchristos (*vS).b[i] = b; 11076ca2c52aSchristos } 11086ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 11096ca2c52aSchristos 11106ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.322:VX:av:vmaxsh %VD, %VA, %VB:Vector Maximum Signed Half Word 11116ca2c52aSchristos int i; 11126ca2c52aSchristos signed16 a, b; 11136ca2c52aSchristos for (i = 0; i < 8; i++) { 11146ca2c52aSchristos a = (*vA).h[i]; 11156ca2c52aSchristos b = (*vB).h[i]; 11166ca2c52aSchristos if (a > b) 11176ca2c52aSchristos (*vS).h[i] = a; 11186ca2c52aSchristos else 11196ca2c52aSchristos (*vS).h[i] = b; 11206ca2c52aSchristos } 11216ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 11226ca2c52aSchristos 11236ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.386:VX:av:vmaxsw %VD, %VA, %VB:Vector Maximum Signed Word 11246ca2c52aSchristos int i; 11256ca2c52aSchristos signed32 a, b; 11266ca2c52aSchristos for (i = 0; i < 4; i++) { 11276ca2c52aSchristos a = (*vA).w[i]; 11286ca2c52aSchristos b = (*vB).w[i]; 11296ca2c52aSchristos if (a > b) 11306ca2c52aSchristos (*vS).w[i] = a; 11316ca2c52aSchristos else 11326ca2c52aSchristos (*vS).w[i] = b; 11336ca2c52aSchristos } 11346ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 11356ca2c52aSchristos 11366ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.2:VX:av:vmaxub %VD, %VA, %VB:Vector Maximum Unsigned Byte 11376ca2c52aSchristos int i; 11386ca2c52aSchristos unsigned8 a, b; 11396ca2c52aSchristos for (i = 0; i < 16; i++) { 11406ca2c52aSchristos a = (*vA).b[i]; 11416ca2c52aSchristos b = (*vB).b[i]; 11426ca2c52aSchristos if (a > b) 11436ca2c52aSchristos (*vS).b[i] = a; 11446ca2c52aSchristos else 11456ca2c52aSchristos (*vS).b[i] = b; 11466ca2c52aSchristos }; 11476ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 11486ca2c52aSchristos 11496ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.66:VX:av:vmaxus %VD, %VA, %VB:Vector Maximum Unsigned Half Word 11506ca2c52aSchristos int i; 11516ca2c52aSchristos unsigned16 a, b; 11526ca2c52aSchristos for (i = 0; i < 8; i++) { 11536ca2c52aSchristos a = (*vA).h[i]; 11546ca2c52aSchristos b = (*vB).h[i]; 11556ca2c52aSchristos if (a > b) 11566ca2c52aSchristos (*vS).h[i] = a; 11576ca2c52aSchristos else 11586ca2c52aSchristos (*vS).h[i] = b; 11596ca2c52aSchristos } 11606ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 11616ca2c52aSchristos 11626ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.130:VX:av:vmaxuw %VD, %VA, %VB:Vector Maximum Unsigned Word 11636ca2c52aSchristos int i; 11646ca2c52aSchristos unsigned32 a, b; 11656ca2c52aSchristos for (i = 0; i < 4; i++) { 11666ca2c52aSchristos a = (*vA).w[i]; 11676ca2c52aSchristos b = (*vB).w[i]; 11686ca2c52aSchristos if (a > b) 11696ca2c52aSchristos (*vS).w[i] = a; 11706ca2c52aSchristos else 11716ca2c52aSchristos (*vS).w[i] = b; 11726ca2c52aSchristos } 11736ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 11746ca2c52aSchristos 11756ca2c52aSchristos 11766ca2c52aSchristos# 11776ca2c52aSchristos# Vector Multiple High instructions, 6-79, 6-80. 11786ca2c52aSchristos# 11796ca2c52aSchristos 11806ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.VC,26.32:VAX:av:vmhaddshs %VD, %VA, %VB, %VC:Vector Multiple High and Add Signed Half Word Saturate 11816ca2c52aSchristos int i, sat, tempsat; 11826ca2c52aSchristos signed16 a, b; 11836ca2c52aSchristos signed32 prod, temp, c; 11846ca2c52aSchristos for (i = 0; i < 8; i++) { 11856ca2c52aSchristos a = (*vA).h[i]; 11866ca2c52aSchristos b = (*vB).h[i]; 11876ca2c52aSchristos c = (signed32)(signed16)(*vC).h[i]; 11886ca2c52aSchristos prod = (signed32)a * (signed32)b; 11896ca2c52aSchristos temp = (prod >> 15) + c; 11906ca2c52aSchristos (*vS).h[i] = altivec_signed_saturate_16(temp, &tempsat); 11916ca2c52aSchristos sat |= tempsat; 11926ca2c52aSchristos } 11936ca2c52aSchristos ALTIVEC_SET_SAT(sat); 11946ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK); 11956ca2c52aSchristos 11966ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.VC,26.33:VAX:av:vmhraddshs %VD, %VA, %VB, %VC:Vector Multiple High Round and Add Signed Half Word Saturate 11976ca2c52aSchristos int i, sat, tempsat; 11986ca2c52aSchristos signed16 a, b; 11996ca2c52aSchristos signed32 prod, temp, c; 12006ca2c52aSchristos for (i = 0; i < 8; i++) { 12016ca2c52aSchristos a = (*vA).h[i]; 12026ca2c52aSchristos b = (*vB).h[i]; 12036ca2c52aSchristos c = (signed32)(signed16)(*vC).h[i]; 12046ca2c52aSchristos prod = (signed32)a * (signed32)b; 12056ca2c52aSchristos prod += 0x4000; 12066ca2c52aSchristos temp = (prod >> 15) + c; 12076ca2c52aSchristos (*vS).h[i] = altivec_signed_saturate_16(temp, &tempsat); 12086ca2c52aSchristos sat |= tempsat; 12096ca2c52aSchristos } 12106ca2c52aSchristos ALTIVEC_SET_SAT(sat); 12116ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK); 12126ca2c52aSchristos 12136ca2c52aSchristos 12146ca2c52aSchristos# 12156ca2c52aSchristos# Vector Minimum instructions, 6-81 ... 6-87 12166ca2c52aSchristos# 12176ca2c52aSchristos 12186ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1098:VX:av:vminfp %VD, %VA, %VB:Vector Minimum Floating Point 12196ca2c52aSchristos int i; 12206ca2c52aSchristos unsigned32 f; 12216ca2c52aSchristos sim_fpu a, b, d; 12226ca2c52aSchristos for (i = 0; i < 4; i++) { 12236ca2c52aSchristos sim_fpu_32to (&a, (*vA).w[i]); 12246ca2c52aSchristos sim_fpu_32to (&b, (*vB).w[i]); 12256ca2c52aSchristos sim_fpu_min (&d, &a, &b); 12266ca2c52aSchristos sim_fpu_to32 (&f, &d); 12276ca2c52aSchristos (*vS).w[i] = f; 12286ca2c52aSchristos } 12296ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 12306ca2c52aSchristos 12316ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.770:VX:av:vminsb %VD, %VA, %VB:Vector Minimum Signed Byte 12326ca2c52aSchristos int i; 12336ca2c52aSchristos signed8 a, b; 12346ca2c52aSchristos for (i = 0; i < 16; i++) { 12356ca2c52aSchristos a = (*vA).b[i]; 12366ca2c52aSchristos b = (*vB).b[i]; 12376ca2c52aSchristos if (a < b) 12386ca2c52aSchristos (*vS).b[i] = a; 12396ca2c52aSchristos else 12406ca2c52aSchristos (*vS).b[i] = b; 12416ca2c52aSchristos } 12426ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 12436ca2c52aSchristos 12446ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.834:VX:av:vminsh %VD, %VA, %VB:Vector Minimum Signed Half Word 12456ca2c52aSchristos int i; 12466ca2c52aSchristos signed16 a, b; 12476ca2c52aSchristos for (i = 0; i < 8; i++) { 12486ca2c52aSchristos a = (*vA).h[i]; 12496ca2c52aSchristos b = (*vB).h[i]; 12506ca2c52aSchristos if (a < b) 12516ca2c52aSchristos (*vS).h[i] = a; 12526ca2c52aSchristos else 12536ca2c52aSchristos (*vS).h[i] = b; 12546ca2c52aSchristos } 12556ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 12566ca2c52aSchristos 12576ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.898:VX:av:vminsw %VD, %VA, %VB:Vector Minimum Signed Word 12586ca2c52aSchristos int i; 12596ca2c52aSchristos signed32 a, b; 12606ca2c52aSchristos for (i = 0; i < 4; i++) { 12616ca2c52aSchristos a = (*vA).w[i]; 12626ca2c52aSchristos b = (*vB).w[i]; 12636ca2c52aSchristos if (a < b) 12646ca2c52aSchristos (*vS).w[i] = a; 12656ca2c52aSchristos else 12666ca2c52aSchristos (*vS).w[i] = b; 12676ca2c52aSchristos } 12686ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 12696ca2c52aSchristos 12706ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.514:VX:av:vminub %VD, %VA, %VB:Vector Minimum Unsigned Byte 12716ca2c52aSchristos int i; 12726ca2c52aSchristos unsigned8 a, b; 12736ca2c52aSchristos for (i = 0; i < 16; i++) { 12746ca2c52aSchristos a = (*vA).b[i]; 12756ca2c52aSchristos b = (*vB).b[i]; 12766ca2c52aSchristos if (a < b) 12776ca2c52aSchristos (*vS).b[i] = a; 12786ca2c52aSchristos else 12796ca2c52aSchristos (*vS).b[i] = b; 12806ca2c52aSchristos }; 12816ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 12826ca2c52aSchristos 12836ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.578:VX:av:vminuh %VD, %VA, %VB:Vector Minimum Unsigned Half Word 12846ca2c52aSchristos int i; 12856ca2c52aSchristos unsigned16 a, b; 12866ca2c52aSchristos for (i = 0; i < 8; i++) { 12876ca2c52aSchristos a = (*vA).h[i]; 12886ca2c52aSchristos b = (*vB).h[i]; 12896ca2c52aSchristos if (a < b) 12906ca2c52aSchristos (*vS).h[i] = a; 12916ca2c52aSchristos else 12926ca2c52aSchristos (*vS).h[i] = b; 12936ca2c52aSchristos } 12946ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 12956ca2c52aSchristos 12966ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.642:VX:av:vminuw %VD, %VA, %VB:Vector Minimum Unsigned Word 12976ca2c52aSchristos int i; 12986ca2c52aSchristos unsigned32 a, b; 12996ca2c52aSchristos for (i = 0; i < 4; i++) { 13006ca2c52aSchristos a = (*vA).w[i]; 13016ca2c52aSchristos b = (*vB).w[i]; 13026ca2c52aSchristos if (a < b) 13036ca2c52aSchristos (*vS).w[i] = a; 13046ca2c52aSchristos else 13056ca2c52aSchristos (*vS).w[i] = b; 13066ca2c52aSchristos } 13076ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 13086ca2c52aSchristos 13096ca2c52aSchristos 13106ca2c52aSchristos# 13116ca2c52aSchristos# Vector Multiply Low instruction, 6-88 13126ca2c52aSchristos# 13136ca2c52aSchristos 13146ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.VC,26.34:VAX:av:vmladduhm %VD, %VA, %VB, %VC:Vector Multiply Low and Add Unsigned Half Word Modulo 13156ca2c52aSchristos int i; 13166ca2c52aSchristos unsigned16 a, b, c; 13176ca2c52aSchristos unsigned32 prod; 13186ca2c52aSchristos for (i = 0; i < 8; i++) { 13196ca2c52aSchristos a = (*vA).h[i]; 13206ca2c52aSchristos b = (*vB).h[i]; 13216ca2c52aSchristos c = (*vC).h[i]; 13226ca2c52aSchristos prod = (unsigned32)a * (unsigned32)b; 13236ca2c52aSchristos (*vS).h[i] = (prod + c) & 0xffff; 13246ca2c52aSchristos } 13256ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK); 13266ca2c52aSchristos 13276ca2c52aSchristos 13286ca2c52aSchristos# 13296ca2c52aSchristos# Vector Merge instructions, 6-89 ... 6-94 13306ca2c52aSchristos# 13316ca2c52aSchristos 13326ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.12:VX:av:vmrghb %VD, %VA, %VB:Vector Merge High Byte 13336ca2c52aSchristos int i; 13346ca2c52aSchristos for (i = 0; i < 16; i += 2) { 13356ca2c52aSchristos (*vS).b[AV_BINDEX(i)] = (*vA).b[AV_BINDEX(i/2)]; 13366ca2c52aSchristos (*vS).b[AV_BINDEX(i+1)] = (*vB).b[AV_BINDEX(i/2)]; 13376ca2c52aSchristos } 13386ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 13396ca2c52aSchristos 13406ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.76:VX:av:vmrghh %VD, %VA, %VB:Vector Merge High Half Word 13416ca2c52aSchristos int i; 13426ca2c52aSchristos for (i = 0; i < 8; i += 2) { 13436ca2c52aSchristos (*vS).h[AV_HINDEX(i)] = (*vA).h[AV_HINDEX(i/2)]; 13446ca2c52aSchristos (*vS).h[AV_HINDEX(i+1)] = (*vB).h[AV_HINDEX(i/2)]; 13456ca2c52aSchristos } 13466ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 13476ca2c52aSchristos 13486ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.140:VX:av:vmrghw %VD, %VA, %VB:Vector Merge High Word 13496ca2c52aSchristos int i; 13506ca2c52aSchristos for (i = 0; i < 4; i += 2) { 13516ca2c52aSchristos (*vS).w[i] = (*vA).w[i/2]; 13526ca2c52aSchristos (*vS).w[i+1] = (*vB).w[i/2]; 13536ca2c52aSchristos } 13546ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 13556ca2c52aSchristos 13566ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.268:VX:av:vmrglb %VD, %VA, %VB:Vector Merge Low Byte 13576ca2c52aSchristos int i; 13586ca2c52aSchristos for (i = 0; i < 16; i += 2) { 13596ca2c52aSchristos (*vS).b[AV_BINDEX(i)] = (*vA).b[AV_BINDEX((i/2) + 8)]; 13606ca2c52aSchristos (*vS).b[AV_BINDEX(i+1)] = (*vB).b[AV_BINDEX((i/2) + 8)]; 13616ca2c52aSchristos } 13626ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 13636ca2c52aSchristos 13646ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.332:VX:av:vmrglh %VD, %VA, %VB:Vector Merge Low Half Word 13656ca2c52aSchristos int i; 13666ca2c52aSchristos for (i = 0; i < 8; i += 2) { 13676ca2c52aSchristos (*vS).h[AV_HINDEX(i)] = (*vA).h[AV_HINDEX((i/2) + 4)]; 13686ca2c52aSchristos (*vS).h[AV_HINDEX(i+1)] = (*vB).h[AV_HINDEX((i/2) + 4)]; 13696ca2c52aSchristos } 13706ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 13716ca2c52aSchristos 13726ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.396:VX:av:vmrglw %VD, %VA, %VB:Vector Merge Low Word 13736ca2c52aSchristos int i; 13746ca2c52aSchristos for (i = 0; i < 4; i += 2) { 13756ca2c52aSchristos (*vS).w[i] = (*vA).w[(i/2) + 2]; 13766ca2c52aSchristos (*vS).w[i+1] = (*vB).w[(i/2) + 2]; 13776ca2c52aSchristos } 13786ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 13796ca2c52aSchristos 13806ca2c52aSchristos 13816ca2c52aSchristos# 13826ca2c52aSchristos# Vector Multiply Sum instructions, 6-95 ... 6-100 13836ca2c52aSchristos# 13846ca2c52aSchristos 13856ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.VC,26.37:VAX:av:vmsummbm %VD, %VA, %VB, %VC:Vector Multiply Sum Mixed-Sign Byte Modulo 13866ca2c52aSchristos int i, j; 13876ca2c52aSchristos signed32 temp; 13886ca2c52aSchristos signed16 prod, a; 13896ca2c52aSchristos unsigned16 b; 13906ca2c52aSchristos for (i = 0; i < 4; i++) { 13916ca2c52aSchristos temp = (*vC).w[i]; 13926ca2c52aSchristos for (j = 0; j < 4; j++) { 13936ca2c52aSchristos a = (signed16)(signed8)(*vA).b[i*4+j]; 13946ca2c52aSchristos b = (*vB).b[i*4+j]; 13956ca2c52aSchristos prod = a * b; 13966ca2c52aSchristos temp += (signed32)prod; 13976ca2c52aSchristos } 13986ca2c52aSchristos (*vS).w[i] = temp; 13996ca2c52aSchristos } 14006ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK); 14016ca2c52aSchristos 14026ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.VC,26.40:VAX:av:vmsumshm %VD, %VA, %VB, %VC:Vector Multiply Sum Signed Half Word Modulo 14036ca2c52aSchristos int i, j; 14046ca2c52aSchristos signed32 temp, prod, a, b; 14056ca2c52aSchristos for (i = 0; i < 4; i++) { 14066ca2c52aSchristos temp = (*vC).w[i]; 14076ca2c52aSchristos for (j = 0; j < 2; j++) { 14086ca2c52aSchristos a = (signed32)(signed16)(*vA).h[i*2+j]; 14096ca2c52aSchristos b = (signed32)(signed16)(*vB).h[i*2+j]; 14106ca2c52aSchristos prod = a * b; 14116ca2c52aSchristos temp += prod; 14126ca2c52aSchristos } 14136ca2c52aSchristos (*vS).w[i] = temp; 14146ca2c52aSchristos } 14156ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK); 14166ca2c52aSchristos 14176ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.VC,26.41:VAX:av:vmsumshs %VD, %VA, %VB, %VC:Vector Multiply Sum Signed Half Word Saturate 14186ca2c52aSchristos int i, j, sat, tempsat; 14196ca2c52aSchristos signed64 temp; 14206ca2c52aSchristos signed32 prod, a, b; 14216ca2c52aSchristos sat = 0; 14226ca2c52aSchristos for (i = 0; i < 4; i++) { 14236ca2c52aSchristos temp = (signed64)(signed32)(*vC).w[i]; 14246ca2c52aSchristos for (j = 0; j < 2; j++) { 14256ca2c52aSchristos a = (signed32)(signed16)(*vA).h[i*2+j]; 14266ca2c52aSchristos b = (signed32)(signed16)(*vB).h[i*2+j]; 14276ca2c52aSchristos prod = a * b; 14286ca2c52aSchristos temp += (signed64)prod; 14296ca2c52aSchristos } 14306ca2c52aSchristos (*vS).w[i] = altivec_signed_saturate_32(temp, &tempsat); 14316ca2c52aSchristos sat |= tempsat; 14326ca2c52aSchristos } 14336ca2c52aSchristos ALTIVEC_SET_SAT(sat); 14346ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK); 14356ca2c52aSchristos 14366ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.VC,26.36:VAX:av:vmsumubm %VD, %VA, %VB, %VC:Vector Multiply Sum Unsigned Byte Modulo 14376ca2c52aSchristos int i, j; 14386ca2c52aSchristos unsigned32 temp; 14396ca2c52aSchristos unsigned16 prod, a, b; 14406ca2c52aSchristos for (i = 0; i < 4; i++) { 14416ca2c52aSchristos temp = (*vC).w[i]; 14426ca2c52aSchristos for (j = 0; j < 4; j++) { 14436ca2c52aSchristos a = (*vA).b[i*4+j]; 14446ca2c52aSchristos b = (*vB).b[i*4+j]; 14456ca2c52aSchristos prod = a * b; 14466ca2c52aSchristos temp += prod; 14476ca2c52aSchristos } 14486ca2c52aSchristos (*vS).w[i] = temp; 14496ca2c52aSchristos } 14506ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK); 14516ca2c52aSchristos 14526ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.VC,26.38:VAX:av:vmsumuhm %VD, %VA, %VB, %VC:Vector Multiply Sum Unsigned Half Word Modulo 14536ca2c52aSchristos int i, j; 14546ca2c52aSchristos unsigned32 temp, prod, a, b; 14556ca2c52aSchristos for (i = 0; i < 4; i++) { 14566ca2c52aSchristos temp = (*vC).w[i]; 14576ca2c52aSchristos for (j = 0; j < 2; j++) { 14586ca2c52aSchristos a = (*vA).h[i*2+j]; 14596ca2c52aSchristos b = (*vB).h[i*2+j]; 14606ca2c52aSchristos prod = a * b; 14616ca2c52aSchristos temp += prod; 14626ca2c52aSchristos } 14636ca2c52aSchristos (*vS).w[i] = temp; 14646ca2c52aSchristos } 14656ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK); 14666ca2c52aSchristos 14676ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.VC,26.39:VAX:av:vmsumuhs %VD, %VA, %VB, %VC:Vector Multiply Sum Unsigned Half Word Saturate 14686ca2c52aSchristos int i, j, sat, tempsat; 14696ca2c52aSchristos unsigned32 temp, prod, a, b; 14706ca2c52aSchristos sat = 0; 14716ca2c52aSchristos for (i = 0; i < 4; i++) { 14726ca2c52aSchristos temp = (*vC).w[i]; 14736ca2c52aSchristos for (j = 0; j < 2; j++) { 14746ca2c52aSchristos a = (*vA).h[i*2+j]; 14756ca2c52aSchristos b = (*vB).h[i*2+j]; 14766ca2c52aSchristos prod = a * b; 14776ca2c52aSchristos temp += prod; 14786ca2c52aSchristos } 14796ca2c52aSchristos (*vS).w[i] = altivec_unsigned_saturate_32(temp, &tempsat); 14806ca2c52aSchristos sat |= tempsat; 14816ca2c52aSchristos } 14826ca2c52aSchristos ALTIVEC_SET_SAT(sat); 14836ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK); 14846ca2c52aSchristos 14856ca2c52aSchristos 14866ca2c52aSchristos# 14876ca2c52aSchristos# Vector Multiply Even/Odd instructions, 6-101 ... 6-108 14886ca2c52aSchristos# 14896ca2c52aSchristos 14906ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.776:VX:av:vmulesb %VD, %VA, %VB:Vector Multiply Even Signed Byte 14916ca2c52aSchristos int i; 14926ca2c52aSchristos signed8 a, b; 14936ca2c52aSchristos signed16 prod; 14946ca2c52aSchristos for (i = 0; i < 8; i++) { 14956ca2c52aSchristos a = (*vA).b[AV_BINDEX(i*2)]; 14966ca2c52aSchristos b = (*vB).b[AV_BINDEX(i*2)]; 14976ca2c52aSchristos prod = a * b; 14986ca2c52aSchristos (*vS).h[AV_HINDEX(i)] = prod; 14996ca2c52aSchristos } 15006ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 15016ca2c52aSchristos 15026ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.840:VX:av:vmulesh %VD, %VA, %VB:Vector Multiply Even Signed Half Word 15036ca2c52aSchristos int i; 15046ca2c52aSchristos signed16 a, b; 15056ca2c52aSchristos signed32 prod; 15066ca2c52aSchristos for (i = 0; i < 4; i++) { 15076ca2c52aSchristos a = (*vA).h[AV_HINDEX(i*2)]; 15086ca2c52aSchristos b = (*vB).h[AV_HINDEX(i*2)]; 15096ca2c52aSchristos prod = a * b; 15106ca2c52aSchristos (*vS).w[i] = prod; 15116ca2c52aSchristos } 15126ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 15136ca2c52aSchristos 15146ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.520:VX:av:vmuleub %VD, %VA, %VB:Vector Multiply Even Unsigned Byte 15156ca2c52aSchristos int i; 15166ca2c52aSchristos unsigned8 a, b; 15176ca2c52aSchristos unsigned16 prod; 15186ca2c52aSchristos for (i = 0; i < 8; i++) { 15196ca2c52aSchristos a = (*vA).b[AV_BINDEX(i*2)]; 15206ca2c52aSchristos b = (*vB).b[AV_BINDEX(i*2)]; 15216ca2c52aSchristos prod = a * b; 15226ca2c52aSchristos (*vS).h[AV_HINDEX(i)] = prod; 15236ca2c52aSchristos } 15246ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 15256ca2c52aSchristos 15266ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.584:VX:av:vmuleuh %VD, %VA, %VB:Vector Multiply Even Unsigned Half Word 15276ca2c52aSchristos int i; 15286ca2c52aSchristos unsigned16 a, b; 15296ca2c52aSchristos unsigned32 prod; 15306ca2c52aSchristos for (i = 0; i < 4; i++) { 15316ca2c52aSchristos a = (*vA).h[AV_HINDEX(i*2)]; 15326ca2c52aSchristos b = (*vB).h[AV_HINDEX(i*2)]; 15336ca2c52aSchristos prod = a * b; 15346ca2c52aSchristos (*vS).w[i] = prod; 15356ca2c52aSchristos } 15366ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 15376ca2c52aSchristos 15386ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.264:VX:av:vmulosb %VD, %VA, %VB:Vector Multiply Odd Signed Byte 15396ca2c52aSchristos int i; 15406ca2c52aSchristos signed8 a, b; 15416ca2c52aSchristos signed16 prod; 15426ca2c52aSchristos for (i = 0; i < 8; i++) { 15436ca2c52aSchristos a = (*vA).b[AV_BINDEX((i*2)+1)]; 15446ca2c52aSchristos b = (*vB).b[AV_BINDEX((i*2)+1)]; 15456ca2c52aSchristos prod = a * b; 15466ca2c52aSchristos (*vS).h[AV_HINDEX(i)] = prod; 15476ca2c52aSchristos } 15486ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 15496ca2c52aSchristos 15506ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.328:VX:av:vmulosh %VD, %VA, %VB:Vector Multiply Odd Signed Half Word 15516ca2c52aSchristos int i; 15526ca2c52aSchristos signed16 a, b; 15536ca2c52aSchristos signed32 prod; 15546ca2c52aSchristos for (i = 0; i < 4; i++) { 15556ca2c52aSchristos a = (*vA).h[AV_HINDEX((i*2)+1)]; 15566ca2c52aSchristos b = (*vB).h[AV_HINDEX((i*2)+1)]; 15576ca2c52aSchristos prod = a * b; 15586ca2c52aSchristos (*vS).w[i] = prod; 15596ca2c52aSchristos } 15606ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 15616ca2c52aSchristos 15626ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.8:VX:av:vmuloub %VD, %VA, %VB:Vector Multiply Odd Unsigned Byte 15636ca2c52aSchristos int i; 15646ca2c52aSchristos unsigned8 a, b; 15656ca2c52aSchristos unsigned16 prod; 15666ca2c52aSchristos for (i = 0; i < 8; i++) { 15676ca2c52aSchristos a = (*vA).b[AV_BINDEX((i*2)+1)]; 15686ca2c52aSchristos b = (*vB).b[AV_BINDEX((i*2)+1)]; 15696ca2c52aSchristos prod = a * b; 15706ca2c52aSchristos (*vS).h[AV_HINDEX(i)] = prod; 15716ca2c52aSchristos } 15726ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 15736ca2c52aSchristos 15746ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.72:VX:av:vmulouh %VD, %VA, %VB:Vector Multiply Odd Unsigned Half Word 15756ca2c52aSchristos int i; 15766ca2c52aSchristos unsigned16 a, b; 15776ca2c52aSchristos unsigned32 prod; 15786ca2c52aSchristos for (i = 0; i < 4; i++) { 15796ca2c52aSchristos a = (*vA).h[AV_HINDEX((i*2)+1)]; 15806ca2c52aSchristos b = (*vB).h[AV_HINDEX((i*2)+1)]; 15816ca2c52aSchristos prod = a * b; 15826ca2c52aSchristos (*vS).w[i] = prod; 15836ca2c52aSchristos } 15846ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 15856ca2c52aSchristos 15866ca2c52aSchristos 15876ca2c52aSchristos# 15886ca2c52aSchristos# Vector Negative Multiply-Subtract instruction, 6-109 15896ca2c52aSchristos# 15906ca2c52aSchristos 15916ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.VC,26.47:VX:av:vnmsubfp %VD, %VA, %VB, %VC:Vector Negative Multiply-Subtract Floating Point 15926ca2c52aSchristos int i; 15936ca2c52aSchristos unsigned32 f; 15946ca2c52aSchristos sim_fpu a, b, c, d, i1, i2; 15956ca2c52aSchristos for (i = 0; i < 4; i++) { 15966ca2c52aSchristos sim_fpu_32to (&a, (*vA).w[i]); 15976ca2c52aSchristos sim_fpu_32to (&b, (*vB).w[i]); 15986ca2c52aSchristos sim_fpu_32to (&c, (*vC).w[i]); 15996ca2c52aSchristos sim_fpu_mul (&i1, &a, &c); 16006ca2c52aSchristos sim_fpu_sub (&i2, &i1, &b); 16016ca2c52aSchristos sim_fpu_neg (&d, &i2); 16026ca2c52aSchristos sim_fpu_to32 (&f, &d); 16036ca2c52aSchristos (*vS).w[i] = f; 16046ca2c52aSchristos } 16056ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK); 16066ca2c52aSchristos 16076ca2c52aSchristos 16086ca2c52aSchristos# 16096ca2c52aSchristos# Vector Logical OR instructions, 6-110, 6-111, 6-177 16106ca2c52aSchristos# 16116ca2c52aSchristos 16126ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1284:VX:av:vnor %VD, %VA, %VB:Vector Logical NOR 16136ca2c52aSchristos int i; 16146ca2c52aSchristos for (i = 0; i < 4; i++) 16156ca2c52aSchristos (*vS).w[i] = ~((*vA).w[i] | (*vB).w[i]); 16166ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 16176ca2c52aSchristos 16186ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1156:VX:av:vor %VD, %VA, %VB:Vector Logical OR 16196ca2c52aSchristos int i; 16206ca2c52aSchristos for (i = 0; i < 4; i++) 16216ca2c52aSchristos (*vS).w[i] = (*vA).w[i] | (*vB).w[i]; 16226ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 16236ca2c52aSchristos 16246ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1220:VX:av:vxor %VD, %VA, %VB:Vector Logical XOR 16256ca2c52aSchristos int i; 16266ca2c52aSchristos for (i = 0; i < 4; i++) 16276ca2c52aSchristos (*vS).w[i] = (*vA).w[i] ^ (*vB).w[i]; 16286ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 16296ca2c52aSchristos 16306ca2c52aSchristos 16316ca2c52aSchristos# 16326ca2c52aSchristos# Vector Permute instruction, 6-112 16336ca2c52aSchristos# 16346ca2c52aSchristos 16356ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.VC,26.43:VX:av:vperm %VD, %VA, %VB, %VC:Vector Permute 16366ca2c52aSchristos int i, who; 16376ca2c52aSchristos /* The permutation vector might have us read into the source vectors 16386ca2c52aSchristos back at positions before the iteration index, so we must latch the 16396ca2c52aSchristos sources to prevent early-clobbering in case the destination vector 16406ca2c52aSchristos is the same as one of them. */ 16416ca2c52aSchristos vreg myvA = (*vA), myvB = (*vB); 16426ca2c52aSchristos for (i = 0; i < 16; i++) { 16436ca2c52aSchristos who = (*vC).b[AV_BINDEX(i)] & 0x1f; 16446ca2c52aSchristos if (who & 0x10) 16456ca2c52aSchristos (*vS).b[AV_BINDEX(i)] = myvB.b[AV_BINDEX(who & 0xf)]; 16466ca2c52aSchristos else 16476ca2c52aSchristos (*vS).b[AV_BINDEX(i)] = myvA.b[AV_BINDEX(who & 0xf)]; 16486ca2c52aSchristos } 16496ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK); 16506ca2c52aSchristos 16516ca2c52aSchristos 16526ca2c52aSchristos# 16536ca2c52aSchristos# Vector Pack instructions, 6-113 ... 6-121 16546ca2c52aSchristos# 16556ca2c52aSchristos 16566ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.782:VX:av:vpkpx %VD, %VA, %VB:Vector Pack Pixel32 16576ca2c52aSchristos int i; 16586ca2c52aSchristos for (i = 0; i < 4; i++) { 16596ca2c52aSchristos (*vS).h[AV_HINDEX(i+4)] = ((((*vB).w[i]) >> 9) & 0xfc00) 16606ca2c52aSchristos | ((((*vB).w[i]) >> 6) & 0x03e0) 16616ca2c52aSchristos | ((((*vB).w[i]) >> 3) & 0x001f); 16626ca2c52aSchristos (*vS).h[AV_HINDEX(i)] = ((((*vA).w[i]) >> 9) & 0xfc00) 16636ca2c52aSchristos | ((((*vA).w[i]) >> 6) & 0x03e0) 16646ca2c52aSchristos | ((((*vA).w[i]) >> 3) & 0x001f); 16656ca2c52aSchristos } 16666ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 16676ca2c52aSchristos 16686ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.398:VX:av:vpkshss %VD, %VA, %VB:Vector Pack Signed Half Word Signed Saturate 16696ca2c52aSchristos int i, sat, tempsat; 16706ca2c52aSchristos signed16 temp; 16716ca2c52aSchristos sat = 0; 16726ca2c52aSchristos for (i = 0; i < 16; i++) { 16736ca2c52aSchristos if (i < 8) 16746ca2c52aSchristos temp = (*vA).h[AV_HINDEX(i)]; 16756ca2c52aSchristos else 16766ca2c52aSchristos temp = (*vB).h[AV_HINDEX(i-8)]; 16776ca2c52aSchristos (*vS).b[AV_BINDEX(i)] = altivec_signed_saturate_8(temp, &tempsat); 16786ca2c52aSchristos sat |= tempsat; 16796ca2c52aSchristos } 16806ca2c52aSchristos ALTIVEC_SET_SAT(sat); 16816ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 16826ca2c52aSchristos 16836ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.270:VX:av:vpkshus %VD, %VA, %VB:Vector Pack Signed Half Word Unsigned Saturate 16846ca2c52aSchristos int i, sat, tempsat; 16856ca2c52aSchristos signed16 temp; 16866ca2c52aSchristos sat = 0; 16876ca2c52aSchristos for (i = 0; i < 16; i++) { 16886ca2c52aSchristos if (i < 8) 16896ca2c52aSchristos temp = (*vA).h[AV_HINDEX(i)]; 16906ca2c52aSchristos else 16916ca2c52aSchristos temp = (*vB).h[AV_HINDEX(i-8)]; 16926ca2c52aSchristos (*vS).b[AV_BINDEX(i)] = altivec_unsigned_saturate_8(temp, &tempsat); 16936ca2c52aSchristos sat |= tempsat; 16946ca2c52aSchristos } 16956ca2c52aSchristos ALTIVEC_SET_SAT(sat); 16966ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 16976ca2c52aSchristos 16986ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.462:VX:av:vpkswss %VD, %VA, %VB:Vector Pack Signed Word Signed Saturate 16996ca2c52aSchristos int i, sat, tempsat; 17006ca2c52aSchristos signed32 temp; 17016ca2c52aSchristos sat = 0; 17026ca2c52aSchristos for (i = 0; i < 8; i++) { 17036ca2c52aSchristos if (i < 4) 17046ca2c52aSchristos temp = (*vA).w[i]; 17056ca2c52aSchristos else 17066ca2c52aSchristos temp = (*vB).w[i-4]; 17076ca2c52aSchristos (*vS).h[AV_HINDEX(i)] = altivec_signed_saturate_16(temp, &tempsat); 17086ca2c52aSchristos sat |= tempsat; 17096ca2c52aSchristos } 17106ca2c52aSchristos ALTIVEC_SET_SAT(sat); 17116ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 17126ca2c52aSchristos 17136ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.334:VX:av:vpkswus %VD, %VA, %VB:Vector Pack Signed Word Unsigned Saturate 17146ca2c52aSchristos int i, sat, tempsat; 17156ca2c52aSchristos signed32 temp; 17166ca2c52aSchristos sat = 0; 17176ca2c52aSchristos for (i = 0; i < 8; i++) { 17186ca2c52aSchristos if (i < 4) 17196ca2c52aSchristos temp = (*vA).w[i]; 17206ca2c52aSchristos else 17216ca2c52aSchristos temp = (*vB).w[i-4]; 17226ca2c52aSchristos (*vS).h[AV_HINDEX(i)] = altivec_unsigned_saturate_16(temp, &tempsat); 17236ca2c52aSchristos sat |= tempsat; 17246ca2c52aSchristos } 17256ca2c52aSchristos ALTIVEC_SET_SAT(sat); 17266ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 17276ca2c52aSchristos 17286ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.14:VX:av:vpkuhum %VD, %VA, %VB:Vector Pack Unsigned Half Word Unsigned Modulo 17296ca2c52aSchristos int i; 17306ca2c52aSchristos for (i = 0; i < 16; i++) 17316ca2c52aSchristos if (i < 8) 17326ca2c52aSchristos (*vS).b[AV_BINDEX(i)] = (*vA).h[AV_HINDEX(i)]; 17336ca2c52aSchristos else 17346ca2c52aSchristos (*vS).b[AV_BINDEX(i)] = (*vB).h[AV_HINDEX(i-8)]; 17356ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 17366ca2c52aSchristos 17376ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.142:VX:av:vpkuhus %VD, %VA, %VB:Vector Pack Unsigned Half Word Unsigned Saturate 17386ca2c52aSchristos int i, sat, tempsat; 17396ca2c52aSchristos signed16 temp; 17406ca2c52aSchristos sat = 0; 17416ca2c52aSchristos for (i = 0; i < 16; i++) { 17426ca2c52aSchristos if (i < 8) 17436ca2c52aSchristos temp = (*vA).h[AV_HINDEX(i)]; 17446ca2c52aSchristos else 17456ca2c52aSchristos temp = (*vB).h[AV_HINDEX(i-8)]; 17466ca2c52aSchristos /* force positive in signed16, ok as we'll toss the bit away anyway */ 17476ca2c52aSchristos temp &= ~0x8000; 17486ca2c52aSchristos (*vS).b[AV_BINDEX(i)] = altivec_unsigned_saturate_8(temp, &tempsat); 17496ca2c52aSchristos sat |= tempsat; 17506ca2c52aSchristos } 17516ca2c52aSchristos ALTIVEC_SET_SAT(sat); 17526ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 17536ca2c52aSchristos 17546ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.78:VX:av:vpkuwum %VD, %VA, %VB:Vector Pack Unsigned Word Unsigned Modulo 17556ca2c52aSchristos int i; 17566ca2c52aSchristos for (i = 0; i < 8; i++) 17576ca2c52aSchristos if (i < 8) 17586ca2c52aSchristos (*vS).h[AV_HINDEX(i)] = (*vA).w[i]; 17596ca2c52aSchristos else 17606ca2c52aSchristos (*vS).h[AV_HINDEX(i)] = (*vB).w[i-8]; 17616ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 17626ca2c52aSchristos 17636ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.206:VX:av:vpkuwus %VD, %VA, %VB:Vector Pack Unsigned Word Unsigned Saturate 17646ca2c52aSchristos int i, sat, tempsat; 17656ca2c52aSchristos signed32 temp; 17666ca2c52aSchristos sat = 0; 17676ca2c52aSchristos for (i = 0; i < 8; i++) { 17686ca2c52aSchristos if (i < 4) 17696ca2c52aSchristos temp = (*vA).w[i]; 17706ca2c52aSchristos else 17716ca2c52aSchristos temp = (*vB).w[i-4]; 17726ca2c52aSchristos /* force positive in signed32, ok as we'll toss the bit away anyway */ 17736ca2c52aSchristos temp &= ~0x80000000; 17746ca2c52aSchristos (*vS).h[AV_HINDEX(i)] = altivec_unsigned_saturate_16(temp, &tempsat); 17756ca2c52aSchristos sat |= tempsat; 17766ca2c52aSchristos } 17776ca2c52aSchristos ALTIVEC_SET_SAT(sat); 17786ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 17796ca2c52aSchristos 17806ca2c52aSchristos 17816ca2c52aSchristos# 17826ca2c52aSchristos# Vector Reciprocal instructions, 6-122, 6-123, 6-131 17836ca2c52aSchristos# 17846ca2c52aSchristos 17856ca2c52aSchristos0.4,6.VS,11.0,16.VB,21.266:VX:av:vrefp %VD, %VB:Vector Reciprocal Estimate Floating Point 17866ca2c52aSchristos int i; 17876ca2c52aSchristos unsigned32 f; 17886ca2c52aSchristos sim_fpu op, d; 17896ca2c52aSchristos for (i = 0; i < 4; i++) { 17906ca2c52aSchristos sim_fpu_32to (&op, (*vB).w[i]); 17916ca2c52aSchristos sim_fpu_div (&d, &sim_fpu_one, &op); 17926ca2c52aSchristos sim_fpu_to32 (&f, &d); 17936ca2c52aSchristos (*vS).w[i] = f; 17946ca2c52aSchristos } 17956ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VB_BITMASK); 17966ca2c52aSchristos 17976ca2c52aSchristos0.4,6.VS,11.0,16.VB,21.330:VX:av:vrsqrtefp %VD, %VB:Vector Reciprocal Square Root Estimate Floating Point 17986ca2c52aSchristos int i; 17996ca2c52aSchristos unsigned32 f; 18006ca2c52aSchristos sim_fpu op, i1, one, d; 18016ca2c52aSchristos for (i = 0; i < 4; i++) { 18026ca2c52aSchristos sim_fpu_32to (&op, (*vB).w[i]); 18036ca2c52aSchristos sim_fpu_sqrt (&i1, &op); 18046ca2c52aSchristos sim_fpu_div (&d, &sim_fpu_one, &i1); 18056ca2c52aSchristos sim_fpu_to32 (&f, &d); 18066ca2c52aSchristos (*vS).w[i] = f; 18076ca2c52aSchristos } 18086ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VB_BITMASK); 18096ca2c52aSchristos 18106ca2c52aSchristos 18116ca2c52aSchristos# 18126ca2c52aSchristos# Vector Round instructions, 6-124 ... 6-127 18136ca2c52aSchristos# 18146ca2c52aSchristos 18156ca2c52aSchristos0.4,6.VS,11.0,16.VB,21.714:VX:av:vrfim %VD, %VB:Vector Round to Floating-Point Integer towards Minus Infinity 18166ca2c52aSchristos int i; 18176ca2c52aSchristos unsigned32 f; 18186ca2c52aSchristos sim_fpu op; 18196ca2c52aSchristos for (i = 0; i < 4; i++) { 18206ca2c52aSchristos sim_fpu_32to (&op, (*vB).w[i]); 18216ca2c52aSchristos sim_fpu_round_32(&op, sim_fpu_round_down, sim_fpu_denorm_default); 18226ca2c52aSchristos sim_fpu_to32 (&f, &op); 18236ca2c52aSchristos (*vS).w[i] = f; 18246ca2c52aSchristos } 18256ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VB_BITMASK); 18266ca2c52aSchristos 18276ca2c52aSchristos0.4,6.VS,11.0,16.VB,21.522:VX:av:vrfin %VD, %VB:Vector Round to Floating-Point Integer Nearest 18286ca2c52aSchristos int i; 18296ca2c52aSchristos unsigned32 f; 18306ca2c52aSchristos sim_fpu op; 18316ca2c52aSchristos for (i = 0; i < 4; i++) { 18326ca2c52aSchristos sim_fpu_32to (&op, (*vB).w[i]); 18336ca2c52aSchristos sim_fpu_round_32(&op, sim_fpu_round_near, sim_fpu_denorm_default); 18346ca2c52aSchristos sim_fpu_to32 (&f, &op); 18356ca2c52aSchristos (*vS).w[i] = f; 18366ca2c52aSchristos } 18376ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VB_BITMASK); 18386ca2c52aSchristos 18396ca2c52aSchristos0.4,6.VS,11.0,16.VB,21.650:VX:av:vrfip %VD, %VB:Vector Round to Floating-Point Integer towards Plus Infinity 18406ca2c52aSchristos int i; 18416ca2c52aSchristos unsigned32 f; 18426ca2c52aSchristos sim_fpu op; 18436ca2c52aSchristos for (i = 0; i < 4; i++) { 18446ca2c52aSchristos sim_fpu_32to (&op, (*vB).w[i]); 18456ca2c52aSchristos sim_fpu_round_32(&op, sim_fpu_round_up, sim_fpu_denorm_default); 18466ca2c52aSchristos sim_fpu_to32 (&f, &op); 18476ca2c52aSchristos (*vS).w[i] = f; 18486ca2c52aSchristos } 18496ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VB_BITMASK); 18506ca2c52aSchristos 18516ca2c52aSchristos0.4,6.VS,11.0,16.VB,21.586:VX:av:vrfiz %VD, %VB:Vector Round to Floating-Point Integer towards Zero 18526ca2c52aSchristos int i; 18536ca2c52aSchristos unsigned32 f; 18546ca2c52aSchristos sim_fpu op; 18556ca2c52aSchristos for (i = 0; i < 4; i++) { 18566ca2c52aSchristos sim_fpu_32to (&op, (*vB).w[i]); 18576ca2c52aSchristos sim_fpu_round_32(&op, sim_fpu_round_zero, sim_fpu_denorm_default); 18586ca2c52aSchristos sim_fpu_to32 (&f, &op); 18596ca2c52aSchristos (*vS).w[i] = f; 18606ca2c52aSchristos } 18616ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VB_BITMASK); 18626ca2c52aSchristos 18636ca2c52aSchristos 18646ca2c52aSchristos# 18656ca2c52aSchristos# Vector Rotate Left instructions, 6-128 ... 6-130 18666ca2c52aSchristos# 18676ca2c52aSchristos 18686ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.4:VX:av:vrlb %VD, %VA, %VB:Vector Rotate Left Integer Byte 18696ca2c52aSchristos int i; 18706ca2c52aSchristos unsigned16 temp; 18716ca2c52aSchristos for (i = 0; i < 16; i++) { 18726ca2c52aSchristos temp = (unsigned16)(*vA).b[i] << (((*vB).b[i]) & 7); 18736ca2c52aSchristos (*vS).b[i] = (temp & 0xff) | ((temp >> 8) & 0xff); 18746ca2c52aSchristos } 18756ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 18766ca2c52aSchristos 18776ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.68:VX:av:vrlh %VD, %VA, %VB:Vector Rotate Left Integer Half Word 18786ca2c52aSchristos int i; 18796ca2c52aSchristos unsigned32 temp; 18806ca2c52aSchristos for (i = 0; i < 8; i++) { 18816ca2c52aSchristos temp = (unsigned32)(*vA).h[i] << (((*vB).h[i]) & 0xf); 18826ca2c52aSchristos (*vS).h[i] = (temp & 0xffff) | ((temp >> 16) & 0xffff); 18836ca2c52aSchristos } 18846ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 18856ca2c52aSchristos 18866ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.132:VX:av:vrlw %VD, %VA, %VB:Vector Rotate Left Integer Word 18876ca2c52aSchristos int i; 18886ca2c52aSchristos unsigned64 temp; 18896ca2c52aSchristos for (i = 0; i < 4; i++) { 18906ca2c52aSchristos temp = (unsigned64)(*vA).w[i] << (((*vB).w[i]) & 0x1f); 18916ca2c52aSchristos (*vS).w[i] = (temp & 0xffffffff) | ((temp >> 32) & 0xffffffff); 18926ca2c52aSchristos } 18936ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 18946ca2c52aSchristos 18956ca2c52aSchristos 18966ca2c52aSchristos# 18976ca2c52aSchristos# Vector Conditional Select instruction, 6-133 18986ca2c52aSchristos# 18996ca2c52aSchristos 19006ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.VC,26.42:VAX:av:vsel %VD, %VA, %VB, %VC:Vector Conditional Select 19016ca2c52aSchristos int i; 19026ca2c52aSchristos unsigned32 c; 19036ca2c52aSchristos for (i = 0; i < 4; i++) { 19046ca2c52aSchristos c = (*vC).w[i]; 19056ca2c52aSchristos (*vS).w[i] = ((*vB).w[i] & c) | ((*vA).w[i] & ~c); 19066ca2c52aSchristos } 19076ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK); 19086ca2c52aSchristos 19096ca2c52aSchristos# 19106ca2c52aSchristos# Vector Shift Left instructions, 6-134 ... 6-139 19116ca2c52aSchristos# 19126ca2c52aSchristos 19136ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.452:VX:av:vsl %VD, %VA, %VB:Vector Shift Left 19146ca2c52aSchristos int sh, i, j, carry, new_carry; 19156ca2c52aSchristos sh = (*vB).b[0] & 7; /* don't bother checking everything */ 19166ca2c52aSchristos carry = 0; 19176ca2c52aSchristos for (j = 3; j >= 0; j--) { 19186ca2c52aSchristos if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN) 19196ca2c52aSchristos i = j; 19206ca2c52aSchristos else 19216ca2c52aSchristos i = (j + 2) % 4; 19226ca2c52aSchristos new_carry = (*vA).w[i] >> (32 - sh); 19236ca2c52aSchristos (*vS).w[i] = ((*vA).w[i] << sh) | carry; 19246ca2c52aSchristos carry = new_carry; 19256ca2c52aSchristos } 19266ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 19276ca2c52aSchristos 19286ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.260:VX:av:vslb %VD, %VA, %VB:Vector Shift Left Integer Byte 19296ca2c52aSchristos int i, sh; 19306ca2c52aSchristos for (i = 0; i < 16; i++) { 19316ca2c52aSchristos sh = ((*vB).b[i]) & 7; 19326ca2c52aSchristos (*vS).b[i] = (*vA).b[i] << sh; 19336ca2c52aSchristos } 19346ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 19356ca2c52aSchristos 19366ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.0,22.SH,26.44:VX:av:vsldol %VD, %VA, %VB:Vector Shift Left Double by Octet Immediate 19376ca2c52aSchristos int i, j; 19386ca2c52aSchristos for (j = 0, i = SH; i < 16; i++) 19396ca2c52aSchristos (*vS).b[j++] = (*vA).b[i]; 19406ca2c52aSchristos for (i = 0; i < SH; i++) 19416ca2c52aSchristos (*vS).b[j++] = (*vB).b[i]; 19426ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 19436ca2c52aSchristos 19446ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.324:VX:av:vslh %VD, %VA, %VB:Vector Shift Left Half Word 19456ca2c52aSchristos int i, sh; 19466ca2c52aSchristos for (i = 0; i < 8; i++) { 19476ca2c52aSchristos sh = ((*vB).h[i]) & 0xf; 19486ca2c52aSchristos (*vS).h[i] = (*vA).h[i] << sh; 19496ca2c52aSchristos } 19506ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 19516ca2c52aSchristos 19526ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1036:VX:av:vslo %VD, %VA, %VB:Vector Shift Left by Octet 19536ca2c52aSchristos int i, sh; 19546ca2c52aSchristos if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN) 19556ca2c52aSchristos sh = ((*vB).b[AV_BINDEX(15)] >> 3) & 0xf; 19566ca2c52aSchristos else 19576ca2c52aSchristos sh = ((*vB).b[AV_BINDEX(0)] >> 3) & 0xf; 19586ca2c52aSchristos for (i = 0; i < 16; i++) { 19596ca2c52aSchristos if (15 - i > sh) 19606ca2c52aSchristos (*vS).b[AV_BINDEX(i)] = (*vA).b[AV_BINDEX(i + sh)]; 19616ca2c52aSchristos else 19626ca2c52aSchristos (*vS).b[AV_BINDEX(i)] = 0; 19636ca2c52aSchristos } 19646ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 19656ca2c52aSchristos 19666ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.388:VX:av:vslw %VD, %VA, %VB:Vector Shift Left Integer Word 19676ca2c52aSchristos int i, sh; 19686ca2c52aSchristos for (i = 0; i < 4; i++) { 19696ca2c52aSchristos sh = ((*vB).w[i]) & 0x1f; 19706ca2c52aSchristos (*vS).w[i] = (*vA).w[i] << sh; 19716ca2c52aSchristos } 19726ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 19736ca2c52aSchristos 19746ca2c52aSchristos 19756ca2c52aSchristos# 19766ca2c52aSchristos# Vector Splat instructions, 6-140 ... 6-145 19776ca2c52aSchristos# 19786ca2c52aSchristos 19796ca2c52aSchristos0.4,6.VS,11.UIMM,16.VB,21.524:VX:av:vspltb %VD, %VB, %UIMM:Vector Splat Byte 19806ca2c52aSchristos int i; 19816ca2c52aSchristos unsigned8 b; 19826ca2c52aSchristos b = (*vB).b[AV_BINDEX(UIMM & 0xf)]; 19836ca2c52aSchristos for (i = 0; i < 16; i++) 19846ca2c52aSchristos (*vS).b[i] = b; 19856ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VB_BITMASK); 19866ca2c52aSchristos 19876ca2c52aSchristos0.4,6.VS,11.UIMM,16.VB,21.588:VX:av:vsplth %VD, %VB, %UIMM:Vector Splat Half Word 19886ca2c52aSchristos int i; 19896ca2c52aSchristos unsigned16 h; 19906ca2c52aSchristos h = (*vB).h[AV_HINDEX(UIMM & 0x7)]; 19916ca2c52aSchristos for (i = 0; i < 8; i++) 19926ca2c52aSchristos (*vS).h[i] = h; 19936ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VB_BITMASK); 19946ca2c52aSchristos 19956ca2c52aSchristos0.4,6.VS,11.SIMM,16.0,21.780:VX:av:vspltisb %VD, %SIMM:Vector Splat Immediate Signed Byte 19966ca2c52aSchristos int i; 19976ca2c52aSchristos signed8 b = SIMM; 19986ca2c52aSchristos /* manual 5-bit signed extension */ 19996ca2c52aSchristos if (b & 0x10) 20006ca2c52aSchristos b -= 0x20; 20016ca2c52aSchristos for (i = 0; i < 16; i++) 20026ca2c52aSchristos (*vS).b[i] = b; 20036ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, 0); 20046ca2c52aSchristos 20056ca2c52aSchristos0.4,6.VS,11.SIMM,16.0,21.844:VX:av:vspltish %VD, %SIMM:Vector Splat Immediate Signed Half Word 20066ca2c52aSchristos int i; 20076ca2c52aSchristos signed16 h = SIMM; 20086ca2c52aSchristos /* manual 5-bit signed extension */ 20096ca2c52aSchristos if (h & 0x10) 20106ca2c52aSchristos h -= 0x20; 20116ca2c52aSchristos for (i = 0; i < 8; i++) 20126ca2c52aSchristos (*vS).h[i] = h; 20136ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, 0); 20146ca2c52aSchristos 20156ca2c52aSchristos0.4,6.VS,11.SIMM,16.0,21.908:VX:av:vspltisw %VD, %SIMM:Vector Splat Immediate Signed Word 20166ca2c52aSchristos int i; 20176ca2c52aSchristos signed32 w = SIMM; 20186ca2c52aSchristos /* manual 5-bit signed extension */ 20196ca2c52aSchristos if (w & 0x10) 20206ca2c52aSchristos w -= 0x20; 20216ca2c52aSchristos for (i = 0; i < 4; i++) 20226ca2c52aSchristos (*vS).w[i] = w; 20236ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, 0); 20246ca2c52aSchristos 20256ca2c52aSchristos0.4,6.VS,11.UIMM,16.VB,21.652:VX:av:vspltw %VD, %VB, %UIMM:Vector Splat Word 20266ca2c52aSchristos int i; 20276ca2c52aSchristos unsigned32 w; 20286ca2c52aSchristos w = (*vB).w[UIMM & 0x3]; 20296ca2c52aSchristos for (i = 0; i < 4; i++) 20306ca2c52aSchristos (*vS).w[i] = w; 20316ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VB_BITMASK); 20326ca2c52aSchristos 20336ca2c52aSchristos 20346ca2c52aSchristos# 20356ca2c52aSchristos# Vector Shift Right instructions, 6-146 ... 6-154 20366ca2c52aSchristos# 20376ca2c52aSchristos 20386ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.708:VX:av:vsr %VD, %VA, %VB:Vector Shift Right 20396ca2c52aSchristos int sh, i, j, carry, new_carry; 20406ca2c52aSchristos sh = (*vB).b[0] & 7; /* don't bother checking everything */ 20416ca2c52aSchristos carry = 0; 20426ca2c52aSchristos for (j = 0; j < 4; j++) { 20436ca2c52aSchristos if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN) 20446ca2c52aSchristos i = j; 20456ca2c52aSchristos else 20466ca2c52aSchristos i = (j + 2) % 4; 20476ca2c52aSchristos new_carry = (*vA).w[i] << (32 - sh); 20486ca2c52aSchristos (*vS).w[i] = ((*vA).w[i] >> sh) | carry; 20496ca2c52aSchristos carry = new_carry; 20506ca2c52aSchristos } 20516ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 20526ca2c52aSchristos 20536ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.772:VX:av:vsrab %VD, %VA, %VB:Vector Shift Right Algebraic Byte 20546ca2c52aSchristos int i, sh; 20556ca2c52aSchristos signed16 a; 20566ca2c52aSchristos for (i = 0; i < 16; i++) { 20576ca2c52aSchristos sh = ((*vB).b[i]) & 7; 20586ca2c52aSchristos a = (signed16)(signed8)(*vA).b[i]; 20596ca2c52aSchristos (*vS).b[i] = (a >> sh) & 0xff; 20606ca2c52aSchristos } 20616ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 20626ca2c52aSchristos 20636ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.836:VX:av:vsrah %VD, %VA, %VB:Vector Shift Right Algebraic Half Word 20646ca2c52aSchristos int i, sh; 20656ca2c52aSchristos signed32 a; 20666ca2c52aSchristos for (i = 0; i < 8; i++) { 20676ca2c52aSchristos sh = ((*vB).h[i]) & 0xf; 20686ca2c52aSchristos a = (signed32)(signed16)(*vA).h[i]; 20696ca2c52aSchristos (*vS).h[i] = (a >> sh) & 0xffff; 20706ca2c52aSchristos } 20716ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 20726ca2c52aSchristos 20736ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.900:VX:av:vsraw %VD, %VA, %VB:Vector Shift Right Algebraic Word 20746ca2c52aSchristos int i, sh; 20756ca2c52aSchristos signed64 a; 20766ca2c52aSchristos for (i = 0; i < 4; i++) { 20776ca2c52aSchristos sh = ((*vB).w[i]) & 0xf; 20786ca2c52aSchristos a = (signed64)(signed32)(*vA).w[i]; 20796ca2c52aSchristos (*vS).w[i] = (a >> sh) & 0xffffffff; 20806ca2c52aSchristos } 20816ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 20826ca2c52aSchristos 20836ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.516:VX:av:vsrb %VD, %VA, %VB:Vector Shift Right Byte 20846ca2c52aSchristos int i, sh; 20856ca2c52aSchristos for (i = 0; i < 16; i++) { 20866ca2c52aSchristos sh = ((*vB).b[i]) & 7; 20876ca2c52aSchristos (*vS).b[i] = (*vA).b[i] >> sh; 20886ca2c52aSchristos } 20896ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 20906ca2c52aSchristos 20916ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.580:VX:av:vsrh %VD, %VA, %VB:Vector Shift Right Half Word 20926ca2c52aSchristos int i, sh; 20936ca2c52aSchristos for (i = 0; i < 8; i++) { 20946ca2c52aSchristos sh = ((*vB).h[i]) & 0xf; 20956ca2c52aSchristos (*vS).h[i] = (*vA).h[i] >> sh; 20966ca2c52aSchristos } 20976ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 20986ca2c52aSchristos 20996ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1100:VX:av:vsro %VD, %VA, %VB:Vector Shift Right Octet 21006ca2c52aSchristos int i, sh; 21016ca2c52aSchristos if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN) 21026ca2c52aSchristos sh = ((*vB).b[AV_BINDEX(15)] >> 3) & 0xf; 21036ca2c52aSchristos else 21046ca2c52aSchristos sh = ((*vB).b[AV_BINDEX(0)] >> 3) & 0xf; 21056ca2c52aSchristos for (i = 0; i < 16; i++) { 21066ca2c52aSchristos if (i < sh) 21076ca2c52aSchristos (*vS).b[AV_BINDEX(i)] = 0; 21086ca2c52aSchristos else 21096ca2c52aSchristos (*vS).b[AV_BINDEX(i)] = (*vA).b[AV_BINDEX(i - sh)]; 21106ca2c52aSchristos } 21116ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 21126ca2c52aSchristos 21136ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.644:VX:av:vsrw %VD, %VA, %VB:Vector Shift Right Word 21146ca2c52aSchristos int i, sh; 21156ca2c52aSchristos for (i = 0; i < 4; i++) { 21166ca2c52aSchristos sh = ((*vB).w[i]) & 0x1f; 21176ca2c52aSchristos (*vS).w[i] = (*vA).w[i] >> sh; 21186ca2c52aSchristos } 21196ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 21206ca2c52aSchristos 21216ca2c52aSchristos 21226ca2c52aSchristos# 21236ca2c52aSchristos# Vector Subtract instructions, 6-155 ... 6-165 21246ca2c52aSchristos# 21256ca2c52aSchristos 21266ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1408:VX:av:vsubcuw %VD, %VA, %VB:Vector Subtract Carryout Unsigned Word 21276ca2c52aSchristos int i; 21286ca2c52aSchristos signed64 temp, a, b; 21296ca2c52aSchristos for (i = 0; i < 4; i++) { 21306ca2c52aSchristos a = (signed64)(unsigned32)(*vA).w[i]; 21316ca2c52aSchristos b = (signed64)(unsigned32)(*vB).w[i]; 21326ca2c52aSchristos temp = a - b; 21336ca2c52aSchristos (*vS).w[i] = ~(temp >> 32) & 1; 21346ca2c52aSchristos } 21356ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 21366ca2c52aSchristos 21376ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.74:VX:av:vsubfp %VD, %VA, %VB:Vector Subtract Floating Point 21386ca2c52aSchristos int i; 21396ca2c52aSchristos unsigned32 f; 21406ca2c52aSchristos sim_fpu a, b, d; 21416ca2c52aSchristos for (i = 0; i < 4; i++) { 21426ca2c52aSchristos sim_fpu_32to (&a, (*vA).w[i]); 21436ca2c52aSchristos sim_fpu_32to (&b, (*vB).w[i]); 21446ca2c52aSchristos sim_fpu_sub (&d, &a, &b); 21456ca2c52aSchristos sim_fpu_to32 (&f, &d); 21466ca2c52aSchristos (*vS).w[i] = f; 21476ca2c52aSchristos } 21486ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 21496ca2c52aSchristos 21506ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1792:VX:av:vsubsbs %VD, %VA, %VB:Vector Subtract Signed Byte Saturate 21516ca2c52aSchristos int i, sat, tempsat; 21526ca2c52aSchristos signed16 temp; 21536ca2c52aSchristos sat = 0; 21546ca2c52aSchristos for (i = 0; i < 16; i++) { 21556ca2c52aSchristos temp = (signed16)(signed8)(*vA).b[i] - (signed16)(signed8)(*vB).b[i]; 21566ca2c52aSchristos (*vS).b[i] = altivec_signed_saturate_8(temp, &tempsat); 21576ca2c52aSchristos sat |= tempsat; 21586ca2c52aSchristos } 21596ca2c52aSchristos ALTIVEC_SET_SAT(sat); 21606ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 21616ca2c52aSchristos 21626ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1856:VX:av:vsubshs %VD, %VA, %VB:Vector Subtract Signed Half Word Saturate 21636ca2c52aSchristos int i, sat, tempsat; 21646ca2c52aSchristos signed32 temp; 21656ca2c52aSchristos sat = 0; 21666ca2c52aSchristos for (i = 0; i < 8; i++) { 21676ca2c52aSchristos temp = (signed32)(signed16)(*vA).h[i] - (signed32)(signed16)(*vB).h[i]; 21686ca2c52aSchristos (*vS).h[i] = altivec_signed_saturate_16(temp, &tempsat); 21696ca2c52aSchristos sat |= tempsat; 21706ca2c52aSchristos } 21716ca2c52aSchristos ALTIVEC_SET_SAT(sat); 21726ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 21736ca2c52aSchristos 21746ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1920:VX:av:vsubsws %VD, %VA, %VB:Vector Subtract Signed Word Saturate 21756ca2c52aSchristos int i, sat, tempsat; 21766ca2c52aSchristos signed64 temp; 21776ca2c52aSchristos sat = 0; 21786ca2c52aSchristos for (i = 0; i < 4; i++) { 21796ca2c52aSchristos temp = (signed64)(signed32)(*vA).w[i] - (signed64)(signed32)(*vB).w[i]; 21806ca2c52aSchristos (*vS).w[i] = altivec_signed_saturate_32(temp, &tempsat); 21816ca2c52aSchristos sat |= tempsat; 21826ca2c52aSchristos } 21836ca2c52aSchristos ALTIVEC_SET_SAT(sat); 21846ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 21856ca2c52aSchristos 21866ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1024:VX:av:vsububm %VD, %VA, %VB:Vector Subtract Unsigned Byte Modulo 21876ca2c52aSchristos int i; 21886ca2c52aSchristos for (i = 0; i < 16; i++) 21896ca2c52aSchristos (*vS).b[i] = (*vA).b[i] - (*vB).b[i]; 21906ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 21916ca2c52aSchristos 21926ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1536:VX:av:vsububs %VD, %VA, %VB:Vector Subtract Unsigned Byte Saturate 21936ca2c52aSchristos int i, sat, tempsat; 21946ca2c52aSchristos signed16 temp; 21956ca2c52aSchristos sat = 0; 21966ca2c52aSchristos for (i = 0; i < 16; i++) { 21976ca2c52aSchristos temp = (signed16)(unsigned8)(*vA).b[i] - (signed16)(unsigned8)(*vB).b[i]; 21986ca2c52aSchristos (*vS).b[i] = altivec_unsigned_saturate_8(temp, &tempsat); 21996ca2c52aSchristos sat |= tempsat; 22006ca2c52aSchristos } 22016ca2c52aSchristos ALTIVEC_SET_SAT(sat); 22026ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 22036ca2c52aSchristos 22046ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1088:VX:av:vsubuhm %VD, %VA, %VB:Vector Subtract Unsigned Half Word Modulo 22056ca2c52aSchristos int i; 22066ca2c52aSchristos for (i = 0; i < 8; i++) 22076ca2c52aSchristos (*vS).h[i] = ((*vA).h[i] - (*vB).h[i]) & 0xffff; 22086ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 22096ca2c52aSchristos 22106ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1600:VX:av:vsubuhs %VD, %VA, %VB:Vector Subtract Unsigned Half Word Saturate 22116ca2c52aSchristos int i, sat, tempsat; 22126ca2c52aSchristos signed32 temp; 22136ca2c52aSchristos for (i = 0; i < 8; i++) { 22146ca2c52aSchristos temp = (signed32)(unsigned16)(*vA).h[i] - (signed32)(unsigned16)(*vB).h[i]; 22156ca2c52aSchristos (*vS).h[i] = altivec_unsigned_saturate_16(temp, &tempsat); 22166ca2c52aSchristos sat |= tempsat; 22176ca2c52aSchristos } 22186ca2c52aSchristos ALTIVEC_SET_SAT(sat); 22196ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 22206ca2c52aSchristos 22216ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1152:VX:av:vsubuwm %VD, %VA, %VB:Vector Subtract Unsigned Word Modulo 22226ca2c52aSchristos int i; 22236ca2c52aSchristos for (i = 0; i < 4; i++) 22246ca2c52aSchristos (*vS).w[i] = (*vA).w[i] - (*vB).w[i]; 22256ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 22266ca2c52aSchristos 22276ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1664:VX:av:vsubuws %VD, %VA, %VB:Vector Subtract Unsigned Word Saturate 22286ca2c52aSchristos int i, sat, tempsat; 22296ca2c52aSchristos signed64 temp; 22306ca2c52aSchristos for (i = 0; i < 4; i++) { 22316ca2c52aSchristos temp = (signed64)(unsigned32)(*vA).w[i] - (signed64)(unsigned32)(*vB).w[i]; 22326ca2c52aSchristos (*vS).w[i] = altivec_unsigned_saturate_32(temp, &tempsat); 22336ca2c52aSchristos sat |= tempsat; 22346ca2c52aSchristos } 22356ca2c52aSchristos ALTIVEC_SET_SAT(sat); 22366ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 22376ca2c52aSchristos 22386ca2c52aSchristos 22396ca2c52aSchristos# 22406ca2c52aSchristos# Vector Sum instructions, 6-166 ... 6-170 22416ca2c52aSchristos# 22426ca2c52aSchristos 22436ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1928:VX:av:vsumsws %VD, %VA, %VB:Vector Sum Across Signed Word Saturate 22446ca2c52aSchristos int i, sat; 22456ca2c52aSchristos signed64 temp; 22466ca2c52aSchristos temp = (signed64)(signed32)(*vB).w[3]; 22476ca2c52aSchristos for (i = 0; i < 4; i++) 22486ca2c52aSchristos temp += (signed64)(signed32)(*vA).w[i]; 22496ca2c52aSchristos (*vS).w[3] = altivec_signed_saturate_32(temp, &sat); 22506ca2c52aSchristos (*vS).w[0] = (*vS).w[1] = (*vS).w[2] = 0; 22516ca2c52aSchristos ALTIVEC_SET_SAT(sat); 22526ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 22536ca2c52aSchristos 22546ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1672:VX:av:vsum2sws %VD, %VA, %VB:Vector Sum Across Partial (1/2) Signed Word Saturate 22556ca2c52aSchristos int i, j, sat, tempsat; 22566ca2c52aSchristos signed64 temp; 22576ca2c52aSchristos for (j = 0; j < 4; j += 2) { 22586ca2c52aSchristos temp = (signed64)(signed32)(*vB).w[j+1]; 22596ca2c52aSchristos temp += (signed64)(signed32)(*vA).w[j] + (signed64)(signed32)(*vA).w[j+1]; 22606ca2c52aSchristos (*vS).w[j+1] = altivec_signed_saturate_32(temp, &tempsat); 22616ca2c52aSchristos sat |= tempsat; 22626ca2c52aSchristos } 22636ca2c52aSchristos (*vS).w[0] = (*vS).w[2] = 0; 22646ca2c52aSchristos ALTIVEC_SET_SAT(sat); 22656ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 22666ca2c52aSchristos 22676ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1800:VX:av:vsum4sbs %VD, %VA, %VB:Vector Sum Across Partial (1/4) Signed Byte Saturate 22686ca2c52aSchristos int i, j, sat, tempsat; 22696ca2c52aSchristos signed64 temp; 22706ca2c52aSchristos for (j = 0; j < 4; j++) { 22716ca2c52aSchristos temp = (signed64)(signed32)(*vB).w[j]; 22726ca2c52aSchristos for (i = 0; i < 4; i++) 22736ca2c52aSchristos temp += (signed64)(signed8)(*vA).b[i+(j*4)]; 22746ca2c52aSchristos (*vS).w[j] = altivec_signed_saturate_32(temp, &tempsat); 22756ca2c52aSchristos sat |= tempsat; 22766ca2c52aSchristos } 22776ca2c52aSchristos ALTIVEC_SET_SAT(sat); 22786ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 22796ca2c52aSchristos 22806ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1608:VX:av:vsum4shs %VD, %VA, %VB:Vector Sum Across Partial (1/4) Signed Half Word Saturate 22816ca2c52aSchristos int i, j, sat, tempsat; 22826ca2c52aSchristos signed64 temp; 22836ca2c52aSchristos for (j = 0; j < 4; j++) { 22846ca2c52aSchristos temp = (signed64)(signed32)(*vB).w[j]; 22856ca2c52aSchristos for (i = 0; i < 2; i++) 22866ca2c52aSchristos temp += (signed64)(signed16)(*vA).h[i+(j*2)]; 22876ca2c52aSchristos (*vS).w[j] = altivec_signed_saturate_32(temp, &tempsat); 22886ca2c52aSchristos sat |= tempsat; 22896ca2c52aSchristos } 22906ca2c52aSchristos ALTIVEC_SET_SAT(sat); 22916ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 22926ca2c52aSchristos 22936ca2c52aSchristos0.4,6.VS,11.VA,16.VB,21.1544:VX:av:vsum4ubs %VD, %VA, %VB:Vector Sum Across Partial (1/4) Unsigned Byte Saturate 22946ca2c52aSchristos int i, j, sat, tempsat; 22956ca2c52aSchristos signed64 utemp; 22966ca2c52aSchristos signed64 temp; 22976ca2c52aSchristos for (j = 0; j < 4; j++) { 22986ca2c52aSchristos utemp = (signed64)(unsigned32)(*vB).w[j]; 22996ca2c52aSchristos for (i = 0; i < 4; i++) 23006ca2c52aSchristos utemp += (signed64)(unsigned16)(*vA).b[i+(j*4)]; 23016ca2c52aSchristos temp = utemp; 23026ca2c52aSchristos (*vS).w[j] = altivec_unsigned_saturate_32(temp, &tempsat); 23036ca2c52aSchristos sat |= tempsat; 23046ca2c52aSchristos } 23056ca2c52aSchristos ALTIVEC_SET_SAT(sat); 23066ca2c52aSchristos PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK); 23076ca2c52aSchristos 23086ca2c52aSchristos 23096ca2c52aSchristos# 23106ca2c52aSchristos# Vector Unpack instructions, 6-171 ... 6-176 23116ca2c52aSchristos# 23126ca2c52aSchristos 23136ca2c52aSchristos0.4,6.VS,11.0,16.VB,21.846:VX:av:vupkhpx %VD, %VB:Vector Unpack High Pixel16 23146ca2c52aSchristos int i; 23156ca2c52aSchristos unsigned16 h; 23166ca2c52aSchristos for (i = 0; i < 4; i++) { 23176ca2c52aSchristos h = (*vB).h[AV_HINDEX(i)]; 23186ca2c52aSchristos (*vS).w[i] = ((h & 0x8000) ? 0xff000000 : 0) 23196ca2c52aSchristos | ((h & 0x7c00) << 6) 23206ca2c52aSchristos | ((h & 0x03e0) << 3) 23216ca2c52aSchristos | ((h & 0x001f)); 23226ca2c52aSchristos } 23236ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VB_BITMASK); 23246ca2c52aSchristos 23256ca2c52aSchristos0.4,6.VS,11.0,16.VB,21.526:VX:av:vupkhsb %VD, %VB:Vector Unpack High Signed Byte 23266ca2c52aSchristos int i; 23276ca2c52aSchristos for (i = 0; i < 8; i++) 23286ca2c52aSchristos (*vS).h[AV_HINDEX(i)] = (signed16)(signed8)(*vB).b[AV_BINDEX(i)]; 23296ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VB_BITMASK); 23306ca2c52aSchristos 23316ca2c52aSchristos0.4,6.VS,11.0,16.VB,21.590:VX:av:vupkhsh %VD, %VB:Vector Unpack High Signed Half Word 23326ca2c52aSchristos int i; 23336ca2c52aSchristos for (i = 0; i < 4; i++) 23346ca2c52aSchristos (*vS).w[i] = (signed32)(signed16)(*vB).h[AV_HINDEX(i)]; 23356ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VB_BITMASK); 23366ca2c52aSchristos 23376ca2c52aSchristos0.4,6.VS,11.0,16.VB,21.974:VX:av:vupklpx %VD, %VB:Vector Unpack Low Pixel16 23386ca2c52aSchristos int i; 23396ca2c52aSchristos unsigned16 h; 23406ca2c52aSchristos for (i = 0; i < 4; i++) { 23416ca2c52aSchristos h = (*vB).h[AV_HINDEX(i + 4)]; 23426ca2c52aSchristos (*vS).w[i] = ((h & 0x8000) ? 0xff000000 : 0) 23436ca2c52aSchristos | ((h & 0x7c00) << 6) 23446ca2c52aSchristos | ((h & 0x03e0) << 3) 23456ca2c52aSchristos | ((h & 0x001f)); 23466ca2c52aSchristos } 23476ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VB_BITMASK); 23486ca2c52aSchristos 23496ca2c52aSchristos0.4,6.VS,11.0,16.VB,21.654:VX:av:vupklsb %VD, %VB:Vector Unpack Low Signed Byte 23506ca2c52aSchristos int i; 23516ca2c52aSchristos for (i = 0; i < 8; i++) 23526ca2c52aSchristos (*vS).h[AV_HINDEX(i)] = (signed16)(signed8)(*vB).b[AV_BINDEX(i + 8)]; 23536ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VB_BITMASK); 23546ca2c52aSchristos 23556ca2c52aSchristos0.4,6.VS,11.0,16.VB,21.718:VX:av:vupklsh %VD, %VB:Vector Unpack Low Signed Half Word 23566ca2c52aSchristos int i; 23576ca2c52aSchristos for (i = 0; i < 4; i++) 23586ca2c52aSchristos (*vS).w[i] = (signed32)(signed16)(*vB).h[AV_HINDEX(i + 4)]; 23596ca2c52aSchristos PPC_INSN_VR(VS_BITMASK, VB_BITMASK); 2360