1// Inferno utils/6l/span.c 2// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c 3// 4// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) 6// Portions Copyright © 1997-1999 Vita Nuova Limited 7// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8// Portions Copyright © 2004,2006 Bruce Ellis 9// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) 10// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11// Portions Copyright © 2009 The Go Authors. All rights reserved. 12// 13// Permission is hereby granted, free of charge, to any person obtaining a copy 14// of this software and associated documentation files (the "Software"), to deal 15// in the Software without restriction, including without limitation the rights 16// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17// copies of the Software, and to permit persons to whom the Software is 18// furnished to do so, subject to the following conditions: 19// 20// The above copyright notice and this permission notice shall be included in 21// all copies or substantial portions of the Software. 22// 23// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29// THE SOFTWARE. 30 31package x86 32 33import ( 34 "cmd/internal/obj" 35 "cmd/internal/objabi" 36 "cmd/internal/sys" 37 "encoding/binary" 38 "fmt" 39 "internal/buildcfg" 40 "log" 41 "strings" 42) 43 44var ( 45 plan9privates *obj.LSym 46) 47 48// Instruction layout. 49 50// Loop alignment constants: 51// want to align loop entry to loopAlign-byte boundary, 52// and willing to insert at most maxLoopPad bytes of NOP to do so. 53// We define a loop entry as the target of a backward jump. 54// 55// gcc uses maxLoopPad = 10 for its 'generic x86-64' config, 56// and it aligns all jump targets, not just backward jump targets. 57// 58// As of 6/1/2012, the effect of setting maxLoopPad = 10 here 59// is very slight but negative, so the alignment is disabled by 60// setting MaxLoopPad = 0. The code is here for reference and 61// for future experiments. 62// 63const ( 64 loopAlign = 16 65 maxLoopPad = 0 66) 67 68// Bit flags that are used to express jump target properties. 69const ( 70 // branchBackwards marks targets that are located behind. 71 // Used to express jumps to loop headers. 72 branchBackwards = (1 << iota) 73 // branchShort marks branches those target is close, 74 // with offset is in -128..127 range. 75 branchShort 76 // branchLoopHead marks loop entry. 77 // Used to insert padding for misaligned loops. 78 branchLoopHead 79) 80 81// opBytes holds optab encoding bytes. 82// Each ytab reserves fixed amount of bytes in this array. 83// 84// The size should be the minimal number of bytes that 85// are enough to hold biggest optab op lines. 86type opBytes [31]uint8 87 88type Optab struct { 89 as obj.As 90 ytab []ytab 91 prefix uint8 92 op opBytes 93} 94 95type movtab struct { 96 as obj.As 97 ft uint8 98 f3t uint8 99 tt uint8 100 code uint8 101 op [4]uint8 102} 103 104const ( 105 Yxxx = iota 106 Ynone 107 Yi0 // $0 108 Yi1 // $1 109 Yu2 // $x, x fits in uint2 110 Yi8 // $x, x fits in int8 111 Yu8 // $x, x fits in uint8 112 Yu7 // $x, x in 0..127 (fits in both int8 and uint8) 113 Ys32 114 Yi32 115 Yi64 116 Yiauto 117 Yal 118 Ycl 119 Yax 120 Ycx 121 Yrb 122 Yrl 123 Yrl32 // Yrl on 32-bit system 124 Yrf 125 Yf0 126 Yrx 127 Ymb 128 Yml 129 Ym 130 Ybr 131 Ycs 132 Yss 133 Yds 134 Yes 135 Yfs 136 Ygs 137 Ygdtr 138 Yidtr 139 Yldtr 140 Ymsw 141 Ytask 142 Ycr0 143 Ycr1 144 Ycr2 145 Ycr3 146 Ycr4 147 Ycr5 148 Ycr6 149 Ycr7 150 Ycr8 151 Ydr0 152 Ydr1 153 Ydr2 154 Ydr3 155 Ydr4 156 Ydr5 157 Ydr6 158 Ydr7 159 Ytr0 160 Ytr1 161 Ytr2 162 Ytr3 163 Ytr4 164 Ytr5 165 Ytr6 166 Ytr7 167 Ymr 168 Ymm 169 Yxr0 // X0 only. "<XMM0>" notation in Intel manual. 170 YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex 171 Yxr // X0..X15 172 YxrEvex // X0..X31 173 Yxm 174 YxmEvex // YxrEvex+Ym 175 Yxvm // VSIB vector array; vm32x/vm64x 176 YxvmEvex // Yxvm which permits High-16 X register as index. 177 YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex 178 Yyr // Y0..Y15 179 YyrEvex // Y0..Y31 180 Yym 181 YymEvex // YyrEvex+Ym 182 Yyvm // VSIB vector array; vm32y/vm64y 183 YyvmEvex // Yyvm which permits High-16 Y register as index. 184 YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex 185 Yzr // Z0..Z31 186 Yzm // Yzr+Ym 187 Yzvm // VSIB vector array; vm32z/vm64z 188 Yk0 // K0 189 Yknot0 // K1..K7; write mask 190 Yk // K0..K7; used for KOP 191 Ykm // Yk+Ym; used for KOP 192 Ytls 193 Ytextsize 194 Yindir 195 Ymax 196) 197 198const ( 199 Zxxx = iota 200 Zlit 201 Zlitm_r 202 Zlitr_m 203 Zlit_m_r 204 Z_rp 205 Zbr 206 Zcall 207 Zcallcon 208 Zcallduff 209 Zcallind 210 Zcallindreg 211 Zib_ 212 Zib_rp 213 Zibo_m 214 Zibo_m_xm 215 Zil_ 216 Zil_rp 217 Ziq_rp 218 Zilo_m 219 Zjmp 220 Zjmpcon 221 Zloop 222 Zo_iw 223 Zm_o 224 Zm_r 225 Z_m_r 226 Zm2_r 227 Zm_r_xm 228 Zm_r_i_xm 229 Zm_r_xm_nr 230 Zr_m_xm_nr 231 Zibm_r // mmx1,mmx2/mem64,imm8 232 Zibr_m 233 Zmb_r 234 Zaut_r 235 Zo_m 236 Zo_m64 237 Zpseudo 238 Zr_m 239 Zr_m_xm 240 Zrp_ 241 Z_ib 242 Z_il 243 Zm_ibo 244 Zm_ilo 245 Zib_rr 246 Zil_rr 247 Zbyte 248 249 Zvex_rm_v_r 250 Zvex_rm_v_ro 251 Zvex_r_v_rm 252 Zvex_i_rm_vo 253 Zvex_v_rm_r 254 Zvex_i_rm_r 255 Zvex_i_r_v 256 Zvex_i_rm_v_r 257 Zvex 258 Zvex_rm_r_vo 259 Zvex_i_r_rm 260 Zvex_hr_rm_v_r 261 262 Zevex_first 263 Zevex_i_r_k_rm 264 Zevex_i_r_rm 265 Zevex_i_rm_k_r 266 Zevex_i_rm_k_vo 267 Zevex_i_rm_r 268 Zevex_i_rm_v_k_r 269 Zevex_i_rm_v_r 270 Zevex_i_rm_vo 271 Zevex_k_rmo 272 Zevex_r_k_rm 273 Zevex_r_v_k_rm 274 Zevex_r_v_rm 275 Zevex_rm_k_r 276 Zevex_rm_v_k_r 277 Zevex_rm_v_r 278 Zevex_last 279 280 Zmax 281) 282 283const ( 284 Px = 0 285 Px1 = 1 // symbolic; exact value doesn't matter 286 P32 = 0x32 // 32-bit only 287 Pe = 0x66 // operand escape 288 Pm = 0x0f // 2byte opcode escape 289 Pq = 0xff // both escapes: 66 0f 290 Pb = 0xfe // byte operands 291 Pf2 = 0xf2 // xmm escape 1: f2 0f 292 Pf3 = 0xf3 // xmm escape 2: f3 0f 293 Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f 294 Pq3 = 0x67 // xmm escape 3: 66 48 0f 295 Pq4 = 0x68 // xmm escape 4: 66 0F 38 296 Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38 297 Pq5 = 0x6a // xmm escape 5: F3 0F 38 298 Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38 299 Pfw = 0xf4 // Pf3 with Rex.w: f3 48 0f 300 Pw = 0x48 // Rex.w 301 Pw8 = 0x90 // symbolic; exact value doesn't matter 302 Py = 0x80 // defaults to 64-bit mode 303 Py1 = 0x81 // symbolic; exact value doesn't matter 304 Py3 = 0x83 // symbolic; exact value doesn't matter 305 Pavx = 0x84 // symbolic: exact value doesn't matter 306 307 RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R 308 Rxw = 1 << 3 // =1, 64-bit operand size 309 Rxr = 1 << 2 // extend modrm reg 310 Rxx = 1 << 1 // extend sib index 311 Rxb = 1 << 0 // extend modrm r/m, sib base, or opcode reg 312) 313 314const ( 315 // Encoding for VEX prefix in tables. 316 // The P, L, and W fields are chosen to match 317 // their eventual locations in the VEX prefix bytes. 318 319 // Encoding for VEX prefix in tables. 320 // The P, L, and W fields are chosen to match 321 // their eventual locations in the VEX prefix bytes. 322 323 // Using spare bit to make leading [E]VEX encoding byte different from 324 // 0x0f even if all other VEX fields are 0. 325 avxEscape = 1 << 6 326 327 // P field - 2 bits 328 vex66 = 1 << 0 329 vexF3 = 2 << 0 330 vexF2 = 3 << 0 331 // L field - 1 bit 332 vexLZ = 0 << 2 333 vexLIG = 0 << 2 334 vex128 = 0 << 2 335 vex256 = 1 << 2 336 // W field - 1 bit 337 vexWIG = 0 << 7 338 vexW0 = 0 << 7 339 vexW1 = 1 << 7 340 // M field - 5 bits, but mostly reserved; we can store up to 3 341 vex0F = 1 << 3 342 vex0F38 = 2 << 3 343 vex0F3A = 3 << 3 344) 345 346var ycover [Ymax * Ymax]uint8 347 348var reg [MAXREG]int 349 350var regrex [MAXREG + 1]int 351 352var ynone = []ytab{ 353 {Zlit, 1, argList{}}, 354} 355 356var ytext = []ytab{ 357 {Zpseudo, 0, argList{Ymb, Ytextsize}}, 358 {Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}}, 359} 360 361var ynop = []ytab{ 362 {Zpseudo, 0, argList{}}, 363 {Zpseudo, 0, argList{Yiauto}}, 364 {Zpseudo, 0, argList{Yml}}, 365 {Zpseudo, 0, argList{Yrf}}, 366 {Zpseudo, 0, argList{Yxr}}, 367 {Zpseudo, 0, argList{Yiauto}}, 368 {Zpseudo, 0, argList{Yml}}, 369 {Zpseudo, 0, argList{Yrf}}, 370 {Zpseudo, 1, argList{Yxr}}, 371} 372 373var yfuncdata = []ytab{ 374 {Zpseudo, 0, argList{Yi32, Ym}}, 375} 376 377var ypcdata = []ytab{ 378 {Zpseudo, 0, argList{Yi32, Yi32}}, 379} 380 381var yxorb = []ytab{ 382 {Zib_, 1, argList{Yi32, Yal}}, 383 {Zibo_m, 2, argList{Yi32, Ymb}}, 384 {Zr_m, 1, argList{Yrb, Ymb}}, 385 {Zm_r, 1, argList{Ymb, Yrb}}, 386} 387 388var yaddl = []ytab{ 389 {Zibo_m, 2, argList{Yi8, Yml}}, 390 {Zil_, 1, argList{Yi32, Yax}}, 391 {Zilo_m, 2, argList{Yi32, Yml}}, 392 {Zr_m, 1, argList{Yrl, Yml}}, 393 {Zm_r, 1, argList{Yml, Yrl}}, 394} 395 396var yincl = []ytab{ 397 {Z_rp, 1, argList{Yrl}}, 398 {Zo_m, 2, argList{Yml}}, 399} 400 401var yincq = []ytab{ 402 {Zo_m, 2, argList{Yml}}, 403} 404 405var ycmpb = []ytab{ 406 {Z_ib, 1, argList{Yal, Yi32}}, 407 {Zm_ibo, 2, argList{Ymb, Yi32}}, 408 {Zm_r, 1, argList{Ymb, Yrb}}, 409 {Zr_m, 1, argList{Yrb, Ymb}}, 410} 411 412var ycmpl = []ytab{ 413 {Zm_ibo, 2, argList{Yml, Yi8}}, 414 {Z_il, 1, argList{Yax, Yi32}}, 415 {Zm_ilo, 2, argList{Yml, Yi32}}, 416 {Zm_r, 1, argList{Yml, Yrl}}, 417 {Zr_m, 1, argList{Yrl, Yml}}, 418} 419 420var yshb = []ytab{ 421 {Zo_m, 2, argList{Yi1, Ymb}}, 422 {Zibo_m, 2, argList{Yu8, Ymb}}, 423 {Zo_m, 2, argList{Ycx, Ymb}}, 424} 425 426var yshl = []ytab{ 427 {Zo_m, 2, argList{Yi1, Yml}}, 428 {Zibo_m, 2, argList{Yu8, Yml}}, 429 {Zo_m, 2, argList{Ycl, Yml}}, 430 {Zo_m, 2, argList{Ycx, Yml}}, 431} 432 433var ytestl = []ytab{ 434 {Zil_, 1, argList{Yi32, Yax}}, 435 {Zilo_m, 2, argList{Yi32, Yml}}, 436 {Zr_m, 1, argList{Yrl, Yml}}, 437 {Zm_r, 1, argList{Yml, Yrl}}, 438} 439 440var ymovb = []ytab{ 441 {Zr_m, 1, argList{Yrb, Ymb}}, 442 {Zm_r, 1, argList{Ymb, Yrb}}, 443 {Zib_rp, 1, argList{Yi32, Yrb}}, 444 {Zibo_m, 2, argList{Yi32, Ymb}}, 445} 446 447var ybtl = []ytab{ 448 {Zibo_m, 2, argList{Yi8, Yml}}, 449 {Zr_m, 1, argList{Yrl, Yml}}, 450} 451 452var ymovw = []ytab{ 453 {Zr_m, 1, argList{Yrl, Yml}}, 454 {Zm_r, 1, argList{Yml, Yrl}}, 455 {Zil_rp, 1, argList{Yi32, Yrl}}, 456 {Zilo_m, 2, argList{Yi32, Yml}}, 457 {Zaut_r, 2, argList{Yiauto, Yrl}}, 458} 459 460var ymovl = []ytab{ 461 {Zr_m, 1, argList{Yrl, Yml}}, 462 {Zm_r, 1, argList{Yml, Yrl}}, 463 {Zil_rp, 1, argList{Yi32, Yrl}}, 464 {Zilo_m, 2, argList{Yi32, Yml}}, 465 {Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD 466 {Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD 467 {Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit) 468 {Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit) 469 {Zaut_r, 2, argList{Yiauto, Yrl}}, 470} 471 472var yret = []ytab{ 473 {Zo_iw, 1, argList{}}, 474 {Zo_iw, 1, argList{Yi32}}, 475} 476 477var ymovq = []ytab{ 478 // valid in 32-bit mode 479 {Zm_r_xm_nr, 1, argList{Ym, Ymr}}, // 0x6f MMX MOVQ (shorter encoding) 480 {Zr_m_xm_nr, 1, argList{Ymr, Ym}}, // 0x7f MMX MOVQ 481 {Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q 482 {Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2 483 {Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64 484 485 // valid only in 64-bit mode, usually with 64-bit prefix 486 {Zr_m, 1, argList{Yrl, Yml}}, // 0x89 487 {Zm_r, 1, argList{Yml, Yrl}}, // 0x8b 488 {Zilo_m, 2, argList{Ys32, Yrl}}, // 32 bit signed 0xc7,(0) 489 {Ziq_rp, 1, argList{Yi64, Yrl}}, // 0xb8 -- 32/64 bit immediate 490 {Zilo_m, 2, argList{Yi32, Yml}}, // 0xc7,(0) 491 {Zm_r_xm, 1, argList{Ymm, Ymr}}, // 0x6e MMX MOVD 492 {Zr_m_xm, 1, argList{Ymr, Ymm}}, // 0x7e MMX MOVD 493 {Zm_r_xm, 2, argList{Yml, Yxr}}, // Pe, 0x6e MOVD xmm load 494 {Zr_m_xm, 2, argList{Yxr, Yml}}, // Pe, 0x7e MOVD xmm store 495 {Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ 496} 497 498var ymovbe = []ytab{ 499 {Zlitm_r, 3, argList{Ym, Yrl}}, 500 {Zlitr_m, 3, argList{Yrl, Ym}}, 501} 502 503var ym_rl = []ytab{ 504 {Zm_r, 1, argList{Ym, Yrl}}, 505} 506 507var yrl_m = []ytab{ 508 {Zr_m, 1, argList{Yrl, Ym}}, 509} 510 511var ymb_rl = []ytab{ 512 {Zmb_r, 1, argList{Ymb, Yrl}}, 513} 514 515var yml_rl = []ytab{ 516 {Zm_r, 1, argList{Yml, Yrl}}, 517} 518 519var yrl_ml = []ytab{ 520 {Zr_m, 1, argList{Yrl, Yml}}, 521} 522 523var yml_mb = []ytab{ 524 {Zr_m, 1, argList{Yrb, Ymb}}, 525 {Zm_r, 1, argList{Ymb, Yrb}}, 526} 527 528var yrb_mb = []ytab{ 529 {Zr_m, 1, argList{Yrb, Ymb}}, 530} 531 532var yxchg = []ytab{ 533 {Z_rp, 1, argList{Yax, Yrl}}, 534 {Zrp_, 1, argList{Yrl, Yax}}, 535 {Zr_m, 1, argList{Yrl, Yml}}, 536 {Zm_r, 1, argList{Yml, Yrl}}, 537} 538 539var ydivl = []ytab{ 540 {Zm_o, 2, argList{Yml}}, 541} 542 543var ydivb = []ytab{ 544 {Zm_o, 2, argList{Ymb}}, 545} 546 547var yimul = []ytab{ 548 {Zm_o, 2, argList{Yml}}, 549 {Zib_rr, 1, argList{Yi8, Yrl}}, 550 {Zil_rr, 1, argList{Yi32, Yrl}}, 551 {Zm_r, 2, argList{Yml, Yrl}}, 552} 553 554var yimul3 = []ytab{ 555 {Zibm_r, 2, argList{Yi8, Yml, Yrl}}, 556 {Zibm_r, 2, argList{Yi32, Yml, Yrl}}, 557} 558 559var ybyte = []ytab{ 560 {Zbyte, 1, argList{Yi64}}, 561} 562 563var yin = []ytab{ 564 {Zib_, 1, argList{Yi32}}, 565 {Zlit, 1, argList{}}, 566} 567 568var yint = []ytab{ 569 {Zib_, 1, argList{Yi32}}, 570} 571 572var ypushl = []ytab{ 573 {Zrp_, 1, argList{Yrl}}, 574 {Zm_o, 2, argList{Ym}}, 575 {Zib_, 1, argList{Yi8}}, 576 {Zil_, 1, argList{Yi32}}, 577} 578 579var ypopl = []ytab{ 580 {Z_rp, 1, argList{Yrl}}, 581 {Zo_m, 2, argList{Ym}}, 582} 583 584var ywrfsbase = []ytab{ 585 {Zm_o, 2, argList{Yrl}}, 586} 587 588var yrdrand = []ytab{ 589 {Zo_m, 2, argList{Yrl}}, 590} 591 592var yclflush = []ytab{ 593 {Zo_m, 2, argList{Ym}}, 594} 595 596var ybswap = []ytab{ 597 {Z_rp, 2, argList{Yrl}}, 598} 599 600var yscond = []ytab{ 601 {Zo_m, 2, argList{Ymb}}, 602} 603 604var yjcond = []ytab{ 605 {Zbr, 0, argList{Ybr}}, 606 {Zbr, 0, argList{Yi0, Ybr}}, 607 {Zbr, 1, argList{Yi1, Ybr}}, 608} 609 610var yloop = []ytab{ 611 {Zloop, 1, argList{Ybr}}, 612} 613 614var ycall = []ytab{ 615 {Zcallindreg, 0, argList{Yml}}, 616 {Zcallindreg, 2, argList{Yrx, Yrx}}, 617 {Zcallind, 2, argList{Yindir}}, 618 {Zcall, 0, argList{Ybr}}, 619 {Zcallcon, 1, argList{Yi32}}, 620} 621 622var yduff = []ytab{ 623 {Zcallduff, 1, argList{Yi32}}, 624} 625 626var yjmp = []ytab{ 627 {Zo_m64, 2, argList{Yml}}, 628 {Zjmp, 0, argList{Ybr}}, 629 {Zjmpcon, 1, argList{Yi32}}, 630} 631 632var yfmvd = []ytab{ 633 {Zm_o, 2, argList{Ym, Yf0}}, 634 {Zo_m, 2, argList{Yf0, Ym}}, 635 {Zm_o, 2, argList{Yrf, Yf0}}, 636 {Zo_m, 2, argList{Yf0, Yrf}}, 637} 638 639var yfmvdp = []ytab{ 640 {Zo_m, 2, argList{Yf0, Ym}}, 641 {Zo_m, 2, argList{Yf0, Yrf}}, 642} 643 644var yfmvf = []ytab{ 645 {Zm_o, 2, argList{Ym, Yf0}}, 646 {Zo_m, 2, argList{Yf0, Ym}}, 647} 648 649var yfmvx = []ytab{ 650 {Zm_o, 2, argList{Ym, Yf0}}, 651} 652 653var yfmvp = []ytab{ 654 {Zo_m, 2, argList{Yf0, Ym}}, 655} 656 657var yfcmv = []ytab{ 658 {Zm_o, 2, argList{Yrf, Yf0}}, 659} 660 661var yfadd = []ytab{ 662 {Zm_o, 2, argList{Ym, Yf0}}, 663 {Zm_o, 2, argList{Yrf, Yf0}}, 664 {Zo_m, 2, argList{Yf0, Yrf}}, 665} 666 667var yfxch = []ytab{ 668 {Zo_m, 2, argList{Yf0, Yrf}}, 669 {Zm_o, 2, argList{Yrf, Yf0}}, 670} 671 672var ycompp = []ytab{ 673 {Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1 674} 675 676var ystsw = []ytab{ 677 {Zo_m, 2, argList{Ym}}, 678 {Zlit, 1, argList{Yax}}, 679} 680 681var ysvrs_mo = []ytab{ 682 {Zm_o, 2, argList{Ym}}, 683} 684 685// unaryDst version of "ysvrs_mo". 686var ysvrs_om = []ytab{ 687 {Zo_m, 2, argList{Ym}}, 688} 689 690var ymm = []ytab{ 691 {Zm_r_xm, 1, argList{Ymm, Ymr}}, 692 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 693} 694 695var yxm = []ytab{ 696 {Zm_r_xm, 1, argList{Yxm, Yxr}}, 697} 698 699var yxm_q4 = []ytab{ 700 {Zm_r, 1, argList{Yxm, Yxr}}, 701} 702 703var yxcvm1 = []ytab{ 704 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 705 {Zm_r_xm, 2, argList{Yxm, Ymr}}, 706} 707 708var yxcvm2 = []ytab{ 709 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 710 {Zm_r_xm, 2, argList{Ymm, Yxr}}, 711} 712 713var yxr = []ytab{ 714 {Zm_r_xm, 1, argList{Yxr, Yxr}}, 715} 716 717var yxr_ml = []ytab{ 718 {Zr_m_xm, 1, argList{Yxr, Yml}}, 719} 720 721var ymr = []ytab{ 722 {Zm_r, 1, argList{Ymr, Ymr}}, 723} 724 725var ymr_ml = []ytab{ 726 {Zr_m_xm, 1, argList{Ymr, Yml}}, 727} 728 729var yxcmpi = []ytab{ 730 {Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}}, 731} 732 733var yxmov = []ytab{ 734 {Zm_r_xm, 1, argList{Yxm, Yxr}}, 735 {Zr_m_xm, 1, argList{Yxr, Yxm}}, 736} 737 738var yxcvfl = []ytab{ 739 {Zm_r_xm, 1, argList{Yxm, Yrl}}, 740} 741 742var yxcvlf = []ytab{ 743 {Zm_r_xm, 1, argList{Yml, Yxr}}, 744} 745 746var yxcvfq = []ytab{ 747 {Zm_r_xm, 2, argList{Yxm, Yrl}}, 748} 749 750var yxcvqf = []ytab{ 751 {Zm_r_xm, 2, argList{Yml, Yxr}}, 752} 753 754var yps = []ytab{ 755 {Zm_r_xm, 1, argList{Ymm, Ymr}}, 756 {Zibo_m_xm, 2, argList{Yi8, Ymr}}, 757 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 758 {Zibo_m_xm, 3, argList{Yi8, Yxr}}, 759} 760 761var yxrrl = []ytab{ 762 {Zm_r, 1, argList{Yxr, Yrl}}, 763} 764 765var ymrxr = []ytab{ 766 {Zm_r, 1, argList{Ymr, Yxr}}, 767 {Zm_r_xm, 1, argList{Yxm, Yxr}}, 768} 769 770var ymshuf = []ytab{ 771 {Zibm_r, 2, argList{Yi8, Ymm, Ymr}}, 772} 773 774var ymshufb = []ytab{ 775 {Zm2_r, 2, argList{Yxm, Yxr}}, 776} 777 778// It should never have more than 1 entry, 779// because some optab entries you opcode secuences that 780// are longer than 2 bytes (zoffset=2 here), 781// ROUNDPD and ROUNDPS and recently added BLENDPD, 782// to name a few. 783var yxshuf = []ytab{ 784 {Zibm_r, 2, argList{Yu8, Yxm, Yxr}}, 785} 786 787var yextrw = []ytab{ 788 {Zibm_r, 2, argList{Yu8, Yxr, Yrl}}, 789 {Zibr_m, 2, argList{Yu8, Yxr, Yml}}, 790} 791 792var yextr = []ytab{ 793 {Zibr_m, 3, argList{Yu8, Yxr, Ymm}}, 794} 795 796var yinsrw = []ytab{ 797 {Zibm_r, 2, argList{Yu8, Yml, Yxr}}, 798} 799 800var yinsr = []ytab{ 801 {Zibm_r, 3, argList{Yu8, Ymm, Yxr}}, 802} 803 804var ypsdq = []ytab{ 805 {Zibo_m, 2, argList{Yi8, Yxr}}, 806} 807 808var ymskb = []ytab{ 809 {Zm_r_xm, 2, argList{Yxr, Yrl}}, 810 {Zm_r_xm, 1, argList{Ymr, Yrl}}, 811} 812 813var ycrc32l = []ytab{ 814 {Zlitm_r, 0, argList{Yml, Yrl}}, 815} 816 817var ycrc32b = []ytab{ 818 {Zlitm_r, 0, argList{Ymb, Yrl}}, 819} 820 821var yprefetch = []ytab{ 822 {Zm_o, 2, argList{Ym}}, 823} 824 825var yaes = []ytab{ 826 {Zlitm_r, 2, argList{Yxm, Yxr}}, 827} 828 829var yxbegin = []ytab{ 830 {Zjmp, 1, argList{Ybr}}, 831} 832 833var yxabort = []ytab{ 834 {Zib_, 1, argList{Yu8}}, 835} 836 837var ylddqu = []ytab{ 838 {Zm_r, 1, argList{Ym, Yxr}}, 839} 840 841var ypalignr = []ytab{ 842 {Zibm_r, 2, argList{Yu8, Yxm, Yxr}}, 843} 844 845var ysha256rnds2 = []ytab{ 846 {Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}}, 847} 848 849var yblendvpd = []ytab{ 850 {Z_m_r, 1, argList{Yxr0, Yxm, Yxr}}, 851} 852 853var ymmxmm0f38 = []ytab{ 854 {Zlitm_r, 3, argList{Ymm, Ymr}}, 855 {Zlitm_r, 5, argList{Yxm, Yxr}}, 856} 857 858var yextractps = []ytab{ 859 {Zibr_m, 2, argList{Yu2, Yxr, Yml}}, 860} 861 862var ysha1rnds4 = []ytab{ 863 {Zibm_r, 2, argList{Yu2, Yxm, Yxr}}, 864} 865 866// You are doasm, holding in your hand a *obj.Prog with p.As set to, say, 867// ACRC32, and p.From and p.To as operands (obj.Addr). The linker scans optab 868// to find the entry with the given p.As and then looks through the ytable for 869// that instruction (the second field in the optab struct) for a line whose 870// first two values match the Ytypes of the p.From and p.To operands. The 871// function oclass computes the specific Ytype of an operand and then the set 872// of more general Ytypes that it satisfies is implied by the ycover table, set 873// up in instinit. For example, oclass distinguishes the constants 0 and 1 874// from the more general 8-bit constants, but instinit says 875// 876// ycover[Yi0*Ymax+Ys32] = 1 877// ycover[Yi1*Ymax+Ys32] = 1 878// ycover[Yi8*Ymax+Ys32] = 1 879// 880// which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32) 881// if that's what an instruction can handle. 882// 883// In parallel with the scan through the ytable for the appropriate line, there 884// is a z pointer that starts out pointing at the strange magic byte list in 885// the Optab struct. With each step past a non-matching ytable line, z 886// advances by the 4th entry in the line. When a matching line is found, that 887// z pointer has the extra data to use in laying down the instruction bytes. 888// The actual bytes laid down are a function of the 3rd entry in the line (that 889// is, the Ztype) and the z bytes. 890// 891// For example, let's look at AADDL. The optab line says: 892// {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 893// 894// and yaddl says 895// var yaddl = []ytab{ 896// {Yi8, Ynone, Yml, Zibo_m, 2}, 897// {Yi32, Ynone, Yax, Zil_, 1}, 898// {Yi32, Ynone, Yml, Zilo_m, 2}, 899// {Yrl, Ynone, Yml, Zr_m, 1}, 900// {Yml, Ynone, Yrl, Zm_r, 1}, 901// } 902// 903// so there are 5 possible types of ADDL instruction that can be laid down, and 904// possible states used to lay them down (Ztype and z pointer, assuming z 905// points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are: 906// 907// Yi8, Yml -> Zibo_m, z (0x83, 00) 908// Yi32, Yax -> Zil_, z+2 (0x05) 909// Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00) 910// Yrl, Yml -> Zr_m, z+2+1+2 (0x01) 911// Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03) 912// 913// The Pconstant in the optab line controls the prefix bytes to emit. That's 914// relatively straightforward as this program goes. 915// 916// The switch on yt.zcase in doasm implements the various Z cases. Zibo_m, for 917// example, is an opcode byte (z[0]) then an asmando (which is some kind of 918// encoded addressing mode for the Yml arg), and then a single immediate byte. 919// Zilo_m is the same but a long (32-bit) immediate. 920var optab = 921// as, ytab, andproto, opcode 922[...]Optab{ 923 {obj.AXXX, nil, 0, opBytes{}}, 924 {AAAA, ynone, P32, opBytes{0x37}}, 925 {AAAD, ynone, P32, opBytes{0xd5, 0x0a}}, 926 {AAAM, ynone, P32, opBytes{0xd4, 0x0a}}, 927 {AAAS, ynone, P32, opBytes{0x3f}}, 928 {AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}}, 929 {AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, 930 {AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, 931 {AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, 932 {AADCXL, yml_rl, Pq4, opBytes{0xf6}}, 933 {AADCXQ, yml_rl, Pq4w, opBytes{0xf6}}, 934 {AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}}, 935 {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 936 {AADDPD, yxm, Pq, opBytes{0x58}}, 937 {AADDPS, yxm, Pm, opBytes{0x58}}, 938 {AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 939 {AADDSD, yxm, Pf2, opBytes{0x58}}, 940 {AADDSS, yxm, Pf3, opBytes{0x58}}, 941 {AADDSUBPD, yxm, Pq, opBytes{0xd0}}, 942 {AADDSUBPS, yxm, Pf2, opBytes{0xd0}}, 943 {AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 944 {AADOXL, yml_rl, Pq5, opBytes{0xf6}}, 945 {AADOXQ, yml_rl, Pq5w, opBytes{0xf6}}, 946 {AADJSP, nil, 0, opBytes{}}, 947 {AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}}, 948 {AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, 949 {AANDNPD, yxm, Pq, opBytes{0x55}}, 950 {AANDNPS, yxm, Pm, opBytes{0x55}}, 951 {AANDPD, yxm, Pq, opBytes{0x54}}, 952 {AANDPS, yxm, Pm, opBytes{0x54}}, 953 {AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, 954 {AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, 955 {AARPL, yrl_ml, P32, opBytes{0x63}}, 956 {ABOUNDL, yrl_m, P32, opBytes{0x62}}, 957 {ABOUNDW, yrl_m, Pe, opBytes{0x62}}, 958 {ABSFL, yml_rl, Pm, opBytes{0xbc}}, 959 {ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}}, 960 {ABSFW, yml_rl, Pq, opBytes{0xbc}}, 961 {ABSRL, yml_rl, Pm, opBytes{0xbd}}, 962 {ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}}, 963 {ABSRW, yml_rl, Pq, opBytes{0xbd}}, 964 {ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}}, 965 {ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}}, 966 {ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}}, 967 {ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}}, 968 {ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}}, 969 {ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}}, 970 {ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}}, 971 {ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}}, 972 {ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}}, 973 {ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}}, 974 {ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}}, 975 {ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}}, 976 {ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}}, 977 {ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}}, 978 {ABYTE, ybyte, Px, opBytes{1}}, 979 {obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}}, 980 {ACBW, ynone, Pe, opBytes{0x98}}, 981 {ACDQ, ynone, Px, opBytes{0x99}}, 982 {ACDQE, ynone, Pw, opBytes{0x98}}, 983 {ACLAC, ynone, Pm, opBytes{01, 0xca}}, 984 {ACLC, ynone, Px, opBytes{0xf8}}, 985 {ACLD, ynone, Px, opBytes{0xfc}}, 986 {ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}}, 987 {ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}}, 988 {ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}}, 989 {ACLI, ynone, Px, opBytes{0xfa}}, 990 {ACLTS, ynone, Pm, opBytes{0x06}}, 991 {ACLWB, yclflush, Pq, opBytes{0xae, 06}}, 992 {ACMC, ynone, Px, opBytes{0xf5}}, 993 {ACMOVLCC, yml_rl, Pm, opBytes{0x43}}, 994 {ACMOVLCS, yml_rl, Pm, opBytes{0x42}}, 995 {ACMOVLEQ, yml_rl, Pm, opBytes{0x44}}, 996 {ACMOVLGE, yml_rl, Pm, opBytes{0x4d}}, 997 {ACMOVLGT, yml_rl, Pm, opBytes{0x4f}}, 998 {ACMOVLHI, yml_rl, Pm, opBytes{0x47}}, 999 {ACMOVLLE, yml_rl, Pm, opBytes{0x4e}}, 1000 {ACMOVLLS, yml_rl, Pm, opBytes{0x46}}, 1001 {ACMOVLLT, yml_rl, Pm, opBytes{0x4c}}, 1002 {ACMOVLMI, yml_rl, Pm, opBytes{0x48}}, 1003 {ACMOVLNE, yml_rl, Pm, opBytes{0x45}}, 1004 {ACMOVLOC, yml_rl, Pm, opBytes{0x41}}, 1005 {ACMOVLOS, yml_rl, Pm, opBytes{0x40}}, 1006 {ACMOVLPC, yml_rl, Pm, opBytes{0x4b}}, 1007 {ACMOVLPL, yml_rl, Pm, opBytes{0x49}}, 1008 {ACMOVLPS, yml_rl, Pm, opBytes{0x4a}}, 1009 {ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}}, 1010 {ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}}, 1011 {ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}}, 1012 {ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}}, 1013 {ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}}, 1014 {ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}}, 1015 {ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}}, 1016 {ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}}, 1017 {ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}}, 1018 {ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}}, 1019 {ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}}, 1020 {ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}}, 1021 {ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}}, 1022 {ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}}, 1023 {ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}}, 1024 {ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}}, 1025 {ACMOVWCC, yml_rl, Pq, opBytes{0x43}}, 1026 {ACMOVWCS, yml_rl, Pq, opBytes{0x42}}, 1027 {ACMOVWEQ, yml_rl, Pq, opBytes{0x44}}, 1028 {ACMOVWGE, yml_rl, Pq, opBytes{0x4d}}, 1029 {ACMOVWGT, yml_rl, Pq, opBytes{0x4f}}, 1030 {ACMOVWHI, yml_rl, Pq, opBytes{0x47}}, 1031 {ACMOVWLE, yml_rl, Pq, opBytes{0x4e}}, 1032 {ACMOVWLS, yml_rl, Pq, opBytes{0x46}}, 1033 {ACMOVWLT, yml_rl, Pq, opBytes{0x4c}}, 1034 {ACMOVWMI, yml_rl, Pq, opBytes{0x48}}, 1035 {ACMOVWNE, yml_rl, Pq, opBytes{0x45}}, 1036 {ACMOVWOC, yml_rl, Pq, opBytes{0x41}}, 1037 {ACMOVWOS, yml_rl, Pq, opBytes{0x40}}, 1038 {ACMOVWPC, yml_rl, Pq, opBytes{0x4b}}, 1039 {ACMOVWPL, yml_rl, Pq, opBytes{0x49}}, 1040 {ACMOVWPS, yml_rl, Pq, opBytes{0x4a}}, 1041 {ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}}, 1042 {ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, 1043 {ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}}, 1044 {ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}}, 1045 {ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, 1046 {ACMPSB, ynone, Pb, opBytes{0xa6}}, 1047 {ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}}, 1048 {ACMPSL, ynone, Px, opBytes{0xa7}}, 1049 {ACMPSQ, ynone, Pw, opBytes{0xa7}}, 1050 {ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}}, 1051 {ACMPSW, ynone, Pe, opBytes{0xa7}}, 1052 {ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, 1053 {ACOMISD, yxm, Pe, opBytes{0x2f}}, 1054 {ACOMISS, yxm, Pm, opBytes{0x2f}}, 1055 {ACPUID, ynone, Pm, opBytes{0xa2}}, 1056 {ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}}, 1057 {ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}}, 1058 {ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}}, 1059 {ACVTPD2PS, yxm, Pe, opBytes{0x5a}}, 1060 {ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}}, 1061 {ACVTPS2PD, yxm, Pm, opBytes{0x5a}}, 1062 {ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}}, 1063 {ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}}, 1064 {ACVTSD2SS, yxm, Pf2, opBytes{0x5a}}, 1065 {ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}}, 1066 {ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}}, 1067 {ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}}, 1068 {ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}}, 1069 {ACVTSS2SD, yxm, Pf3, opBytes{0x5a}}, 1070 {ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}}, 1071 {ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}}, 1072 {ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}}, 1073 {ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}}, 1074 {ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}}, 1075 {ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}}, 1076 {ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}}, 1077 {ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}}, 1078 {ACWD, ynone, Pe, opBytes{0x99}}, 1079 {ACWDE, ynone, Px, opBytes{0x98}}, 1080 {ACQO, ynone, Pw, opBytes{0x99}}, 1081 {ADAA, ynone, P32, opBytes{0x27}}, 1082 {ADAS, ynone, P32, opBytes{0x2f}}, 1083 {ADECB, yscond, Pb, opBytes{0xfe, 01}}, 1084 {ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}}, 1085 {ADECQ, yincq, Pw, opBytes{0xff, 01}}, 1086 {ADECW, yincq, Pe, opBytes{0xff, 01}}, 1087 {ADIVB, ydivb, Pb, opBytes{0xf6, 06}}, 1088 {ADIVL, ydivl, Px, opBytes{0xf7, 06}}, 1089 {ADIVPD, yxm, Pe, opBytes{0x5e}}, 1090 {ADIVPS, yxm, Pm, opBytes{0x5e}}, 1091 {ADIVQ, ydivl, Pw, opBytes{0xf7, 06}}, 1092 {ADIVSD, yxm, Pf2, opBytes{0x5e}}, 1093 {ADIVSS, yxm, Pf3, opBytes{0x5e}}, 1094 {ADIVW, ydivl, Pe, opBytes{0xf7, 06}}, 1095 {ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}}, 1096 {ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}}, 1097 {AEMMS, ynone, Pm, opBytes{0x77}}, 1098 {AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}}, 1099 {AENTER, nil, 0, opBytes{}}, // botch 1100 {AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}}, 1101 {AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}}, 1102 {AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}}, 1103 {AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}}, 1104 {AHLT, ynone, Px, opBytes{0xf4}}, 1105 {AIDIVB, ydivb, Pb, opBytes{0xf6, 07}}, 1106 {AIDIVL, ydivl, Px, opBytes{0xf7, 07}}, 1107 {AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}}, 1108 {AIDIVW, ydivl, Pe, opBytes{0xf7, 07}}, 1109 {AIMULB, ydivb, Pb, opBytes{0xf6, 05}}, 1110 {AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, 1111 {AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, 1112 {AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, 1113 {AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}}, 1114 {AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}}, 1115 {AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}}, 1116 {AINB, yin, Pb, opBytes{0xe4, 0xec}}, 1117 {AINW, yin, Pe, opBytes{0xe5, 0xed}}, 1118 {AINL, yin, Px, opBytes{0xe5, 0xed}}, 1119 {AINCB, yscond, Pb, opBytes{0xfe, 00}}, 1120 {AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}}, 1121 {AINCQ, yincq, Pw, opBytes{0xff, 00}}, 1122 {AINCW, yincq, Pe, opBytes{0xff, 00}}, 1123 {AINSB, ynone, Pb, opBytes{0x6c}}, 1124 {AINSL, ynone, Px, opBytes{0x6d}}, 1125 {AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}}, 1126 {AINSW, ynone, Pe, opBytes{0x6d}}, 1127 {AICEBP, ynone, Px, opBytes{0xf1}}, 1128 {AINT, yint, Px, opBytes{0xcd}}, 1129 {AINTO, ynone, P32, opBytes{0xce}}, 1130 {AIRETL, ynone, Px, opBytes{0xcf}}, 1131 {AIRETQ, ynone, Pw, opBytes{0xcf}}, 1132 {AIRETW, ynone, Pe, opBytes{0xcf}}, 1133 {AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}}, 1134 {AJCS, yjcond, Px, opBytes{0x72, 0x82}}, 1135 {AJCXZL, yloop, Px, opBytes{0xe3}}, 1136 {AJCXZW, yloop, Px, opBytes{0xe3}}, 1137 {AJCXZQ, yloop, Px, opBytes{0xe3}}, 1138 {AJEQ, yjcond, Px, opBytes{0x74, 0x84}}, 1139 {AJGE, yjcond, Px, opBytes{0x7d, 0x8d}}, 1140 {AJGT, yjcond, Px, opBytes{0x7f, 0x8f}}, 1141 {AJHI, yjcond, Px, opBytes{0x77, 0x87}}, 1142 {AJLE, yjcond, Px, opBytes{0x7e, 0x8e}}, 1143 {AJLS, yjcond, Px, opBytes{0x76, 0x86}}, 1144 {AJLT, yjcond, Px, opBytes{0x7c, 0x8c}}, 1145 {AJMI, yjcond, Px, opBytes{0x78, 0x88}}, 1146 {obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}}, 1147 {AJNE, yjcond, Px, opBytes{0x75, 0x85}}, 1148 {AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}}, 1149 {AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}}, 1150 {AJPC, yjcond, Px, opBytes{0x7b, 0x8b}}, 1151 {AJPL, yjcond, Px, opBytes{0x79, 0x89}}, 1152 {AJPS, yjcond, Px, opBytes{0x7a, 0x8a}}, 1153 {AHADDPD, yxm, Pq, opBytes{0x7c}}, 1154 {AHADDPS, yxm, Pf2, opBytes{0x7c}}, 1155 {AHSUBPD, yxm, Pq, opBytes{0x7d}}, 1156 {AHSUBPS, yxm, Pf2, opBytes{0x7d}}, 1157 {ALAHF, ynone, Px, opBytes{0x9f}}, 1158 {ALARL, yml_rl, Pm, opBytes{0x02}}, 1159 {ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}}, 1160 {ALARW, yml_rl, Pq, opBytes{0x02}}, 1161 {ALDDQU, ylddqu, Pf2, opBytes{0xf0}}, 1162 {ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}}, 1163 {ALEAL, ym_rl, Px, opBytes{0x8d}}, 1164 {ALEAQ, ym_rl, Pw, opBytes{0x8d}}, 1165 {ALEAVEL, ynone, P32, opBytes{0xc9}}, 1166 {ALEAVEQ, ynone, Py, opBytes{0xc9}}, 1167 {ALEAVEW, ynone, Pe, opBytes{0xc9}}, 1168 {ALEAW, ym_rl, Pe, opBytes{0x8d}}, 1169 {ALOCK, ynone, Px, opBytes{0xf0}}, 1170 {ALODSB, ynone, Pb, opBytes{0xac}}, 1171 {ALODSL, ynone, Px, opBytes{0xad}}, 1172 {ALODSQ, ynone, Pw, opBytes{0xad}}, 1173 {ALODSW, ynone, Pe, opBytes{0xad}}, 1174 {ALONG, ybyte, Px, opBytes{4}}, 1175 {ALOOP, yloop, Px, opBytes{0xe2}}, 1176 {ALOOPEQ, yloop, Px, opBytes{0xe1}}, 1177 {ALOOPNE, yloop, Px, opBytes{0xe0}}, 1178 {ALTR, ydivl, Pm, opBytes{0x00, 03}}, 1179 {ALZCNTL, yml_rl, Pf3, opBytes{0xbd}}, 1180 {ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}}, 1181 {ALZCNTW, yml_rl, Pef3, opBytes{0xbd}}, 1182 {ALSLL, yml_rl, Pm, opBytes{0x03}}, 1183 {ALSLW, yml_rl, Pq, opBytes{0x03}}, 1184 {ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}}, 1185 {AMASKMOVOU, yxr, Pe, opBytes{0xf7}}, 1186 {AMASKMOVQ, ymr, Pm, opBytes{0xf7}}, 1187 {AMAXPD, yxm, Pe, opBytes{0x5f}}, 1188 {AMAXPS, yxm, Pm, opBytes{0x5f}}, 1189 {AMAXSD, yxm, Pf2, opBytes{0x5f}}, 1190 {AMAXSS, yxm, Pf3, opBytes{0x5f}}, 1191 {AMINPD, yxm, Pe, opBytes{0x5d}}, 1192 {AMINPS, yxm, Pm, opBytes{0x5d}}, 1193 {AMINSD, yxm, Pf2, opBytes{0x5d}}, 1194 {AMINSS, yxm, Pf3, opBytes{0x5d}}, 1195 {AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}}, 1196 {AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}}, 1197 {AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}}, 1198 {AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}}, 1199 {AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}}, 1200 {AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}}, 1201 {AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}}, 1202 {AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}}, 1203 {AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}}, 1204 {AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}}, 1205 {AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}}, 1206 {AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}}, 1207 {AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}}, 1208 {AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}}, 1209 {AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}}, 1210 {AMOVHLPS, yxr, Pm, opBytes{0x12}}, 1211 {AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}}, 1212 {AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}}, 1213 {AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}}, 1214 {AMOVLHPS, yxr, Pm, opBytes{0x16}}, 1215 {AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}}, 1216 {AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}}, 1217 {AMOVLQSX, yml_rl, Pw, opBytes{0x63}}, 1218 {AMOVLQZX, yml_rl, Px, opBytes{0x8b}}, 1219 {AMOVMSKPD, yxrrl, Pq, opBytes{0x50}}, 1220 {AMOVMSKPS, yxrrl, Pm, opBytes{0x50}}, 1221 {AMOVNTO, yxr_ml, Pe, opBytes{0xe7}}, 1222 {AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}}, 1223 {AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}}, 1224 {AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}}, 1225 {AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}}, 1226 {AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}}, 1227 {AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}}, 1228 {AMOVSB, ynone, Pb, opBytes{0xa4}}, 1229 {AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}}, 1230 {AMOVSL, ynone, Px, opBytes{0xa5}}, 1231 {AMOVSQ, ynone, Pw, opBytes{0xa5}}, 1232 {AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}}, 1233 {AMOVSW, ynone, Pe, opBytes{0xa5}}, 1234 {AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}}, 1235 {AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}}, 1236 {AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}}, 1237 {AMOVWLSX, yml_rl, Pm, opBytes{0xbf}}, 1238 {AMOVWLZX, yml_rl, Pm, opBytes{0xb7}}, 1239 {AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}}, 1240 {AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}}, 1241 {AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}}, 1242 {AMULB, ydivb, Pb, opBytes{0xf6, 04}}, 1243 {AMULL, ydivl, Px, opBytes{0xf7, 04}}, 1244 {AMULPD, yxm, Pe, opBytes{0x59}}, 1245 {AMULPS, yxm, Ym, opBytes{0x59}}, 1246 {AMULQ, ydivl, Pw, opBytes{0xf7, 04}}, 1247 {AMULSD, yxm, Pf2, opBytes{0x59}}, 1248 {AMULSS, yxm, Pf3, opBytes{0x59}}, 1249 {AMULW, ydivl, Pe, opBytes{0xf7, 04}}, 1250 {ANEGB, yscond, Pb, opBytes{0xf6, 03}}, 1251 {ANEGL, yscond, Px, opBytes{0xf7, 03}}, 1252 {ANEGQ, yscond, Pw, opBytes{0xf7, 03}}, 1253 {ANEGW, yscond, Pe, opBytes{0xf7, 03}}, 1254 {obj.ANOP, ynop, Px, opBytes{0, 0}}, 1255 {ANOTB, yscond, Pb, opBytes{0xf6, 02}}, 1256 {ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here. 1257 {ANOTQ, yscond, Pw, opBytes{0xf7, 02}}, 1258 {ANOTW, yscond, Pe, opBytes{0xf7, 02}}, 1259 {AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}}, 1260 {AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, 1261 {AORPD, yxm, Pq, opBytes{0x56}}, 1262 {AORPS, yxm, Pm, opBytes{0x56}}, 1263 {AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, 1264 {AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, 1265 {AOUTB, yin, Pb, opBytes{0xe6, 0xee}}, 1266 {AOUTL, yin, Px, opBytes{0xe7, 0xef}}, 1267 {AOUTW, yin, Pe, opBytes{0xe7, 0xef}}, 1268 {AOUTSB, ynone, Pb, opBytes{0x6e}}, 1269 {AOUTSL, ynone, Px, opBytes{0x6f}}, 1270 {AOUTSW, ynone, Pe, opBytes{0x6f}}, 1271 {APABSB, yxm_q4, Pq4, opBytes{0x1c}}, 1272 {APABSD, yxm_q4, Pq4, opBytes{0x1e}}, 1273 {APABSW, yxm_q4, Pq4, opBytes{0x1d}}, 1274 {APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}}, 1275 {APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}}, 1276 {APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}}, 1277 {APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}}, 1278 {APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}}, 1279 {APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}}, 1280 {APADDQ, yxm, Pe, opBytes{0xd4}}, 1281 {APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}}, 1282 {APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}}, 1283 {APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}}, 1284 {APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}}, 1285 {APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}}, 1286 {APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}}, 1287 {APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}}, 1288 {APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}}, 1289 {APAUSE, ynone, Px, opBytes{0xf3, 0x90}}, 1290 {APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}}, 1291 {APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}}, 1292 {APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}}, 1293 {APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}}, 1294 {APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}}, 1295 {APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}}, 1296 {APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}}, 1297 {APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}}, 1298 {APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}}, 1299 {APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}}, 1300 {APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}}, 1301 {APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}}, 1302 {APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}}, 1303 {APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}}, 1304 {APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}}, 1305 {APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}}, 1306 {APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}}, 1307 {APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}}, 1308 {APHADDSW, yxm_q4, Pq4, opBytes{0x03}}, 1309 {APHADDW, yxm_q4, Pq4, opBytes{0x01}}, 1310 {APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}}, 1311 {APHSUBD, yxm_q4, Pq4, opBytes{0x06}}, 1312 {APHSUBSW, yxm_q4, Pq4, opBytes{0x07}}, 1313 {APHSUBW, yxm_q4, Pq4, opBytes{0x05}}, 1314 {APINSRW, yinsrw, Pq, opBytes{0xc4, 00}}, 1315 {APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}}, 1316 {APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}}, 1317 {APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}}, 1318 {APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}}, 1319 {APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}}, 1320 {APMAXSB, yxm_q4, Pq4, opBytes{0x3c}}, 1321 {APMAXSD, yxm_q4, Pq4, opBytes{0x3d}}, 1322 {APMAXSW, yxm, Pe, opBytes{0xee}}, 1323 {APMAXUB, yxm, Pe, opBytes{0xde}}, 1324 {APMAXUD, yxm_q4, Pq4, opBytes{0x3f}}, 1325 {APMAXUW, yxm_q4, Pq4, opBytes{0x3e}}, 1326 {APMINSB, yxm_q4, Pq4, opBytes{0x38}}, 1327 {APMINSD, yxm_q4, Pq4, opBytes{0x39}}, 1328 {APMINSW, yxm, Pe, opBytes{0xea}}, 1329 {APMINUB, yxm, Pe, opBytes{0xda}}, 1330 {APMINUD, yxm_q4, Pq4, opBytes{0x3b}}, 1331 {APMINUW, yxm_q4, Pq4, opBytes{0x3a}}, 1332 {APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}}, 1333 {APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}}, 1334 {APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}}, 1335 {APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}}, 1336 {APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}}, 1337 {APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}}, 1338 {APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}}, 1339 {APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}}, 1340 {APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}}, 1341 {APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}}, 1342 {APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}}, 1343 {APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}}, 1344 {APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}}, 1345 {APMULDQ, yxm_q4, Pq4, opBytes{0x28}}, 1346 {APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}}, 1347 {APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}}, 1348 {APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}}, 1349 {APMULLD, yxm_q4, Pq4, opBytes{0x40}}, 1350 {APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}}, 1351 {APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}}, 1352 {APOPAL, ynone, P32, opBytes{0x61}}, 1353 {APOPAW, ynone, Pe, opBytes{0x61}}, 1354 {APOPCNTW, yml_rl, Pef3, opBytes{0xb8}}, 1355 {APOPCNTL, yml_rl, Pf3, opBytes{0xb8}}, 1356 {APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}}, 1357 {APOPFL, ynone, P32, opBytes{0x9d}}, 1358 {APOPFQ, ynone, Py, opBytes{0x9d}}, 1359 {APOPFW, ynone, Pe, opBytes{0x9d}}, 1360 {APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}}, 1361 {APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}}, 1362 {APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}}, 1363 {APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}}, 1364 {APSADBW, yxm, Pq, opBytes{0xf6}}, 1365 {APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}}, 1366 {APSHUFL, yxshuf, Pq, opBytes{0x70, 00}}, 1367 {APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}}, 1368 {APSHUFW, ymshuf, Pm, opBytes{0x70, 00}}, 1369 {APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}}, 1370 {APSIGNB, yxm_q4, Pq4, opBytes{0x08}}, 1371 {APSIGND, yxm_q4, Pq4, opBytes{0x0a}}, 1372 {APSIGNW, yxm_q4, Pq4, opBytes{0x09}}, 1373 {APSLLO, ypsdq, Pq, opBytes{0x73, 07}}, 1374 {APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}}, 1375 {APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}}, 1376 {APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}}, 1377 {APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}}, 1378 {APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}}, 1379 {APSRLO, ypsdq, Pq, opBytes{0x73, 03}}, 1380 {APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}}, 1381 {APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}}, 1382 {APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}}, 1383 {APSUBB, yxm, Pe, opBytes{0xf8}}, 1384 {APSUBL, yxm, Pe, opBytes{0xfa}}, 1385 {APSUBQ, yxm, Pe, opBytes{0xfb}}, 1386 {APSUBSB, yxm, Pe, opBytes{0xe8}}, 1387 {APSUBSW, yxm, Pe, opBytes{0xe9}}, 1388 {APSUBUSB, yxm, Pe, opBytes{0xd8}}, 1389 {APSUBUSW, yxm, Pe, opBytes{0xd9}}, 1390 {APSUBW, yxm, Pe, opBytes{0xf9}}, 1391 {APTEST, yxm_q4, Pq4, opBytes{0x17}}, 1392 {APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}}, 1393 {APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}}, 1394 {APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}}, 1395 {APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}}, 1396 {APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}}, 1397 {APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}}, 1398 {APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}}, 1399 {APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}}, 1400 {APUSHAL, ynone, P32, opBytes{0x60}}, 1401 {APUSHAW, ynone, Pe, opBytes{0x60}}, 1402 {APUSHFL, ynone, P32, opBytes{0x9c}}, 1403 {APUSHFQ, ynone, Py, opBytes{0x9c}}, 1404 {APUSHFW, ynone, Pe, opBytes{0x9c}}, 1405 {APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, 1406 {APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, 1407 {APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, 1408 {APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}}, 1409 {AQUAD, ybyte, Px, opBytes{8}}, 1410 {ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}}, 1411 {ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, 1412 {ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, 1413 {ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, 1414 {ARCPPS, yxm, Pm, opBytes{0x53}}, 1415 {ARCPSS, yxm, Pf3, opBytes{0x53}}, 1416 {ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}}, 1417 {ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, 1418 {ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, 1419 {ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, 1420 {AREP, ynone, Px, opBytes{0xf3}}, 1421 {AREPN, ynone, Px, opBytes{0xf2}}, 1422 {obj.ARET, ynone, Px, opBytes{0xc3}}, 1423 {ARETFW, yret, Pe, opBytes{0xcb, 0xca}}, 1424 {ARETFL, yret, Px, opBytes{0xcb, 0xca}}, 1425 {ARETFQ, yret, Pw, opBytes{0xcb, 0xca}}, 1426 {AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}}, 1427 {AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, 1428 {AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, 1429 {AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, 1430 {ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}}, 1431 {ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, 1432 {ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, 1433 {ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, 1434 {ARSQRTPS, yxm, Pm, opBytes{0x52}}, 1435 {ARSQRTSS, yxm, Pf3, opBytes{0x52}}, 1436 {ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL 1437 {ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}}, 1438 {ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1439 {ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1440 {ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1441 {ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}}, 1442 {ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, 1443 {ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, 1444 {ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, 1445 {ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}}, 1446 {ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, 1447 {ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, 1448 {ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, 1449 {ASCASB, ynone, Pb, opBytes{0xae}}, 1450 {ASCASL, ynone, Px, opBytes{0xaf}}, 1451 {ASCASQ, ynone, Pw, opBytes{0xaf}}, 1452 {ASCASW, ynone, Pe, opBytes{0xaf}}, 1453 {ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}}, 1454 {ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}}, 1455 {ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}}, 1456 {ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}}, 1457 {ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}}, 1458 {ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}}, 1459 {ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}}, 1460 {ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}}, 1461 {ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}}, 1462 {ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}}, 1463 {ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}}, 1464 {ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}}, 1465 {ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}}, 1466 {ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}}, 1467 {ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}}, 1468 {ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}}, 1469 {ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}}, 1470 {ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1471 {ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1472 {ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1473 {ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}}, 1474 {ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, 1475 {ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, 1476 {ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, 1477 {ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}}, 1478 {ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}}, 1479 {ASQRTPD, yxm, Pe, opBytes{0x51}}, 1480 {ASQRTPS, yxm, Pm, opBytes{0x51}}, 1481 {ASQRTSD, yxm, Pf2, opBytes{0x51}}, 1482 {ASQRTSS, yxm, Pf3, opBytes{0x51}}, 1483 {ASTC, ynone, Px, opBytes{0xf9}}, 1484 {ASTD, ynone, Px, opBytes{0xfd}}, 1485 {ASTI, ynone, Px, opBytes{0xfb}}, 1486 {ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}}, 1487 {ASTOSB, ynone, Pb, opBytes{0xaa}}, 1488 {ASTOSL, ynone, Px, opBytes{0xab}}, 1489 {ASTOSQ, ynone, Pw, opBytes{0xab}}, 1490 {ASTOSW, ynone, Pe, opBytes{0xab}}, 1491 {ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}}, 1492 {ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, 1493 {ASUBPD, yxm, Pe, opBytes{0x5c}}, 1494 {ASUBPS, yxm, Pm, opBytes{0x5c}}, 1495 {ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, 1496 {ASUBSD, yxm, Pf2, opBytes{0x5c}}, 1497 {ASUBSS, yxm, Pf3, opBytes{0x5c}}, 1498 {ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, 1499 {ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}}, 1500 {ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall 1501 {ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}}, 1502 {ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, 1503 {ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, 1504 {ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, 1505 {ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}}, 1506 {obj.ATEXT, ytext, Px, opBytes{}}, 1507 {AUCOMISD, yxm, Pe, opBytes{0x2e}}, 1508 {AUCOMISS, yxm, Pm, opBytes{0x2e}}, 1509 {AUNPCKHPD, yxm, Pe, opBytes{0x15}}, 1510 {AUNPCKHPS, yxm, Pm, opBytes{0x15}}, 1511 {AUNPCKLPD, yxm, Pe, opBytes{0x14}}, 1512 {AUNPCKLPS, yxm, Pm, opBytes{0x14}}, 1513 {AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}}, 1514 {AVERR, ydivl, Pm, opBytes{0x00, 04}}, 1515 {AVERW, ydivl, Pm, opBytes{0x00, 05}}, 1516 {AWAIT, ynone, Px, opBytes{0x9b}}, 1517 {AWORD, ybyte, Px, opBytes{2}}, 1518 {AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}}, 1519 {AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}}, 1520 {AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}}, 1521 {AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}}, 1522 {AXLAT, ynone, Px, opBytes{0xd7}}, 1523 {AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}}, 1524 {AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, 1525 {AXORPD, yxm, Pe, opBytes{0x57}}, 1526 {AXORPS, yxm, Pm, opBytes{0x57}}, 1527 {AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, 1528 {AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, 1529 {AFMOVB, yfmvx, Px, opBytes{0xdf, 04}}, 1530 {AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}}, 1531 {AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}}, 1532 {AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}}, 1533 {AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}}, 1534 {AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}}, 1535 {AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}}, 1536 {AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}}, 1537 {AFMOVV, yfmvx, Px, opBytes{0xdf, 05}}, 1538 {AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}}, 1539 {AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}}, 1540 {AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}}, 1541 {AFMOVX, yfmvx, Px, opBytes{0xdb, 05}}, 1542 {AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}}, 1543 {AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}}, 1544 {AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}}, 1545 {AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}}, 1546 {AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}}, 1547 {AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}}, 1548 {AFCMOVB, yfcmv, Px, opBytes{0xda, 00}}, 1549 {AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}}, 1550 {AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}}, 1551 {AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}}, 1552 {AFCMOVE, yfcmv, Px, opBytes{0xda, 01}}, 1553 {AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}}, 1554 {AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}}, 1555 {AFCMOVU, yfcmv, Px, opBytes{0xda, 03}}, 1556 {AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}}, 1557 {AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}}, // botch 1558 {AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch 1559 {AFCOMDPP, ycompp, Px, opBytes{0xde, 03}}, 1560 {AFCOMF, yfmvx, Px, opBytes{0xd8, 02}}, 1561 {AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}}, 1562 {AFCOMI, yfcmv, Px, opBytes{0xdb, 06}}, 1563 {AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}}, 1564 {AFCOML, yfmvx, Px, opBytes{0xda, 02}}, 1565 {AFCOMLP, yfmvx, Px, opBytes{0xda, 03}}, 1566 {AFCOMW, yfmvx, Px, opBytes{0xde, 02}}, 1567 {AFCOMWP, yfmvx, Px, opBytes{0xde, 03}}, 1568 {AFUCOM, ycompp, Px, opBytes{0xdd, 04}}, 1569 {AFUCOMI, ycompp, Px, opBytes{0xdb, 05}}, 1570 {AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}}, 1571 {AFUCOMP, ycompp, Px, opBytes{0xdd, 05}}, 1572 {AFUCOMPP, ycompp, Px, opBytes{0xda, 13}}, 1573 {AFADDDP, ycompp, Px, opBytes{0xde, 00}}, 1574 {AFADDW, yfmvx, Px, opBytes{0xde, 00}}, 1575 {AFADDL, yfmvx, Px, opBytes{0xda, 00}}, 1576 {AFADDF, yfmvx, Px, opBytes{0xd8, 00}}, 1577 {AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}}, 1578 {AFMULDP, ycompp, Px, opBytes{0xde, 01}}, 1579 {AFMULW, yfmvx, Px, opBytes{0xde, 01}}, 1580 {AFMULL, yfmvx, Px, opBytes{0xda, 01}}, 1581 {AFMULF, yfmvx, Px, opBytes{0xd8, 01}}, 1582 {AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}}, 1583 {AFSUBDP, ycompp, Px, opBytes{0xde, 05}}, 1584 {AFSUBW, yfmvx, Px, opBytes{0xde, 04}}, 1585 {AFSUBL, yfmvx, Px, opBytes{0xda, 04}}, 1586 {AFSUBF, yfmvx, Px, opBytes{0xd8, 04}}, 1587 {AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}}, 1588 {AFSUBRDP, ycompp, Px, opBytes{0xde, 04}}, 1589 {AFSUBRW, yfmvx, Px, opBytes{0xde, 05}}, 1590 {AFSUBRL, yfmvx, Px, opBytes{0xda, 05}}, 1591 {AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}}, 1592 {AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}}, 1593 {AFDIVDP, ycompp, Px, opBytes{0xde, 07}}, 1594 {AFDIVW, yfmvx, Px, opBytes{0xde, 06}}, 1595 {AFDIVL, yfmvx, Px, opBytes{0xda, 06}}, 1596 {AFDIVF, yfmvx, Px, opBytes{0xd8, 06}}, 1597 {AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}}, 1598 {AFDIVRDP, ycompp, Px, opBytes{0xde, 06}}, 1599 {AFDIVRW, yfmvx, Px, opBytes{0xde, 07}}, 1600 {AFDIVRL, yfmvx, Px, opBytes{0xda, 07}}, 1601 {AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}}, 1602 {AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}}, 1603 {AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}}, 1604 {AFFREE, nil, 0, opBytes{}}, 1605 {AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}}, 1606 {AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}}, 1607 {AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}}, 1608 {AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}}, 1609 {AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}}, 1610 {AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}}, 1611 {AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}}, 1612 {AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}}, 1613 {AFABS, ynone, Px, opBytes{0xd9, 0xe1}}, 1614 {AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}}, 1615 {AFBSTP, yclflush, Px, opBytes{0xdf, 06}}, 1616 {AFCHS, ynone, Px, opBytes{0xd9, 0xe0}}, 1617 {AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}}, 1618 {AFCOS, ynone, Px, opBytes{0xd9, 0xff}}, 1619 {AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}}, 1620 {AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}}, 1621 {AFINIT, ynone, Px, opBytes{0xdb, 0xe3}}, 1622 {AFLD1, ynone, Px, opBytes{0xd9, 0xe8}}, 1623 {AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}}, 1624 {AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}}, 1625 {AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}}, 1626 {AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}}, 1627 {AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}}, 1628 {AFLDZ, ynone, Px, opBytes{0xd9, 0xee}}, 1629 {AFNOP, ynone, Px, opBytes{0xd9, 0xd0}}, 1630 {AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}}, 1631 {AFPREM, ynone, Px, opBytes{0xd9, 0xf8}}, 1632 {AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}}, 1633 {AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}}, 1634 {AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}}, 1635 {AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}}, 1636 {AFSIN, ynone, Px, opBytes{0xd9, 0xfe}}, 1637 {AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}}, 1638 {AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}}, 1639 {AFTST, ynone, Px, opBytes{0xd9, 0xe4}}, 1640 {AFXAM, ynone, Px, opBytes{0xd9, 0xe5}}, 1641 {AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}}, 1642 {AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}}, 1643 {AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}}, 1644 {ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}}, 1645 {ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}}, 1646 {ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}}, 1647 {ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}}, 1648 {ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}}, 1649 {ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}}, 1650 {AINVD, ynone, Pm, opBytes{0x08}}, 1651 {AINVLPG, ydivb, Pm, opBytes{0x01, 07}}, 1652 {AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}}, 1653 {ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}}, 1654 {AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}}, 1655 {AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}}, 1656 {AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}}, 1657 {ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}}, 1658 {ARDMSR, ynone, Pm, opBytes{0x32}}, 1659 {ARDPMC, ynone, Pm, opBytes{0x33}}, 1660 {ARDTSC, ynone, Pm, opBytes{0x31}}, 1661 {ARSM, ynone, Pm, opBytes{0xaa}}, 1662 {ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}}, 1663 {ASYSRET, ynone, Pm, opBytes{0x07}}, 1664 {AWBINVD, ynone, Pm, opBytes{0x09}}, 1665 {AWRMSR, ynone, Pm, opBytes{0x30}}, 1666 {AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}}, 1667 {AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}}, 1668 {AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}}, 1669 {AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}}, 1670 {AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}}, 1671 {ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}}, 1672 {ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, 1673 {ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, 1674 {ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, 1675 {APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}}, 1676 {APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}}, 1677 {APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}}, 1678 {APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}}, 1679 {AMOVQL, yrl_ml, Px, opBytes{0x89}}, 1680 {obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}}, 1681 {AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}}, 1682 {AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}}, 1683 {AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}}, 1684 {AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}}, 1685 {AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}}, 1686 {AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}}, 1687 {AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}}, 1688 {AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}}, 1689 {AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}}, 1690 {AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}}, 1691 {APSHUFD, yxshuf, Pq, opBytes{0x70, 0}}, 1692 {APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}}, 1693 {APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}}, 1694 {APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}}, 1695 {AMOVDDUP, yxm, Pf2, opBytes{0x12}}, 1696 {AMOVSHDUP, yxm, Pf3, opBytes{0x16}}, 1697 {AMOVSLDUP, yxm, Pf3, opBytes{0x12}}, 1698 {ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}}, 1699 {ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}}, 1700 {AUD1, ynone, Pm, opBytes{0xb9, 0}}, 1701 {AUD2, ynone, Pm, opBytes{0x0b, 0}}, 1702 {AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}}, 1703 {ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}}, 1704 {ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}}, 1705 {ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}}, 1706 {ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}}, 1707 {ALMSW, ydivl, Pm, opBytes{0x01, 06}}, 1708 {ALLDT, ydivl, Pm, opBytes{0x00, 02}}, 1709 {ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}}, 1710 {ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}}, 1711 {ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}}, 1712 {ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}}, 1713 {ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}}, 1714 {AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}}, 1715 {AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}}, 1716 {AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}}, 1717 {AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}}, 1718 {AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}}, 1719 {AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}}, 1720 {AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}}, 1721 {AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}}, 1722 {AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}}, 1723 {AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}}, 1724 {AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}}, 1725 {AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}}, 1726 {ASGDT, yclflush, Pm, opBytes{0x01, 00}}, 1727 {ASIDT, yclflush, Pm, opBytes{0x01, 01}}, 1728 {ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}}, 1729 {ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}}, 1730 {ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}}, 1731 {ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}}, 1732 {ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}}, 1733 {ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}}, 1734 {ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}}, 1735 {ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}}, 1736 {ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}}, 1737 {AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}}, 1738 {AMOVBEW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}}, 1739 {AMOVBEL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}}, 1740 {AMOVBEQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}}, 1741 {ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}}, 1742 {ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}}, 1743 {ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}}, 1744 {ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}}, 1745 {ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}}, 1746 {ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}}, 1747 {ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}}, 1748 {ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}}, 1749 {ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}}, 1750 {ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}}, 1751 {APBLENDVB, yblendvpd, Pq4, opBytes{0x10}}, 1752 {ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}}, 1753 {ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}}, 1754 {ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}}, 1755 {ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}}, 1756 {ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}}, 1757 {ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}}, 1758 {ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}}, 1759 {ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}}, 1760 {ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}}, 1761 {ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}}, 1762 {ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}}, 1763 {AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}}, 1764 {AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}}, 1765 {AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}}, 1766 {AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}}, 1767 {ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}}, 1768 {ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}}, 1769 {ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}}, 1770 {ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}}, 1771 {ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}}, 1772 {ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}}, 1773 {ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}}, 1774 {ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}}, 1775 {ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}}, 1776 1777 {ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}}, 1778 {ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}}, 1779 {AXACQUIRE, ynone, Px, opBytes{0xf2}}, 1780 {AXRELEASE, ynone, Px, opBytes{0xf3}}, 1781 {AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}}, 1782 {AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}}, 1783 {AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}}, 1784 {AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}}, 1785 {AXGETBV, ynone, Pm, opBytes{01, 0xd0}}, 1786 {obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}}, 1787 {obj.APCDATA, ypcdata, Px, opBytes{0, 0}}, 1788 {obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}}, 1789 {obj.ADUFFZERO, yduff, Px, opBytes{0xe8}}, 1790 1791 {obj.AEND, nil, 0, opBytes{}}, 1792 {0, nil, 0, opBytes{}}, 1793} 1794 1795var opindex [(ALAST + 1) & obj.AMask]*Optab 1796 1797// useAbs reports whether s describes a symbol that must avoid pc-relative addressing. 1798// This happens on systems like Solaris that call .so functions instead of system calls. 1799// It does not seem to be necessary for any other systems. This is probably working 1800// around a Solaris-specific bug that should be fixed differently, but we don't know 1801// what that bug is. And this does fix it. 1802func useAbs(ctxt *obj.Link, s *obj.LSym) bool { 1803 if ctxt.Headtype == objabi.Hsolaris { 1804 // All the Solaris dynamic imports from libc.so begin with "libc_". 1805 return strings.HasPrefix(s.Name, "libc_") 1806 } 1807 return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared 1808} 1809 1810// single-instruction no-ops of various lengths. 1811// constructed by hand and disassembled with gdb to verify. 1812// see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion. 1813var nop = [][16]uint8{ 1814 {0x90}, 1815 {0x66, 0x90}, 1816 {0x0F, 0x1F, 0x00}, 1817 {0x0F, 0x1F, 0x40, 0x00}, 1818 {0x0F, 0x1F, 0x44, 0x00, 0x00}, 1819 {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}, 1820 {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}, 1821 {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, 1822 {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, 1823} 1824 1825// Native Client rejects the repeated 0x66 prefix. 1826// {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, 1827func fillnop(p []byte, n int) { 1828 var m int 1829 1830 for n > 0 { 1831 m = n 1832 if m > len(nop) { 1833 m = len(nop) 1834 } 1835 copy(p[:m], nop[m-1][:m]) 1836 p = p[m:] 1837 n -= m 1838 } 1839} 1840 1841func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 { 1842 s.Grow(int64(c) + int64(pad)) 1843 fillnop(s.P[c:], int(pad)) 1844 return c + pad 1845} 1846 1847func spadjop(ctxt *obj.Link, l, q obj.As) obj.As { 1848 if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 { 1849 return l 1850 } 1851 return q 1852} 1853 1854// isJump returns whether p is a jump instruction. 1855// It is used to ensure that no standalone or macro-fused jump will straddle 1856// or end on a 32 byte boundary by inserting NOPs before the jumps. 1857func isJump(p *obj.Prog) bool { 1858 return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL || 1859 p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO 1860} 1861 1862// lookForJCC returns the first real instruction starting from p, if that instruction is a conditional 1863// jump. Otherwise, nil is returned. 1864func lookForJCC(p *obj.Prog) *obj.Prog { 1865 // Skip any PCDATA, FUNCDATA or NOP instructions 1866 var q *obj.Prog 1867 for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link { 1868 } 1869 1870 if q == nil || q.To.Target() == nil || p.As == obj.AJMP || p.As == obj.ACALL { 1871 return nil 1872 } 1873 1874 switch q.As { 1875 case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI, 1876 AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT: 1877 default: 1878 return nil 1879 } 1880 1881 return q 1882} 1883 1884// fusedJump determines whether p can be fused with a subsequent conditional jump instruction. 1885// If it can, we return true followed by the total size of the fused jump. If it can't, we return false. 1886// Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2. 1887func fusedJump(p *obj.Prog) (bool, uint8) { 1888 var fusedSize uint8 1889 1890 // The first instruction in a macro fused pair may be preceded by the LOCK prefix, 1891 // or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we 1892 // need to be careful to insert any padding before the locks rather than directly after them. 1893 1894 if p.As == AXRELEASE || p.As == AXACQUIRE { 1895 fusedSize += p.Isize 1896 for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link { 1897 } 1898 if p == nil { 1899 return false, 0 1900 } 1901 } 1902 if p.As == ALOCK { 1903 fusedSize += p.Isize 1904 for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link { 1905 } 1906 if p == nil { 1907 return false, 0 1908 } 1909 } 1910 cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW 1911 1912 cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ || 1913 p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp 1914 1915 testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW || 1916 p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW 1917 1918 incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW || 1919 p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW 1920 1921 if !cmpAddSub && !testAnd && !incDec { 1922 return false, 0 1923 } 1924 1925 if !incDec { 1926 var argOne obj.AddrType 1927 var argTwo obj.AddrType 1928 if cmp { 1929 argOne = p.From.Type 1930 argTwo = p.To.Type 1931 } else { 1932 argOne = p.To.Type 1933 argTwo = p.From.Type 1934 } 1935 if argOne == obj.TYPE_REG { 1936 if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM { 1937 return false, 0 1938 } 1939 } else if argOne == obj.TYPE_MEM { 1940 if argTwo != obj.TYPE_REG { 1941 return false, 0 1942 } 1943 } else { 1944 return false, 0 1945 } 1946 } 1947 1948 fusedSize += p.Isize 1949 jmp := lookForJCC(p) 1950 if jmp == nil { 1951 return false, 0 1952 } 1953 1954 fusedSize += jmp.Isize 1955 1956 if testAnd { 1957 return true, fusedSize 1958 } 1959 1960 if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI || 1961 jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC { 1962 return false, 0 1963 } 1964 1965 if cmpAddSub { 1966 return true, fusedSize 1967 } 1968 1969 if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS { 1970 return false, 0 1971 } 1972 1973 return true, fusedSize 1974} 1975 1976type padJumpsCtx int32 1977 1978func makePjcCtx(ctxt *obj.Link) padJumpsCtx { 1979 // Disable jump padding on 32 bit builds by settting 1980 // padJumps to 0. 1981 if ctxt.Arch.Family == sys.I386 { 1982 return padJumpsCtx(0) 1983 } 1984 1985 // Disable jump padding for hand written assembly code. 1986 if ctxt.IsAsm { 1987 return padJumpsCtx(0) 1988 } 1989 1990 return padJumpsCtx(32) 1991} 1992 1993// padJump detects whether the instruction being assembled is a standalone or a macro-fused 1994// jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does 1995// not cross or end on a 32 byte boundary. 1996func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 { 1997 if pjc == 0 { 1998 return c 1999 } 2000 2001 var toPad int32 2002 fj, fjSize := fusedJump(p) 2003 mask := int32(pjc - 1) 2004 if fj { 2005 if (c&mask)+int32(fjSize) >= int32(pjc) { 2006 toPad = int32(pjc) - (c & mask) 2007 } 2008 } else if isJump(p) { 2009 if (c&mask)+int32(p.Isize) >= int32(pjc) { 2010 toPad = int32(pjc) - (c & mask) 2011 } 2012 } 2013 if toPad <= 0 { 2014 return c 2015 } 2016 2017 return noppad(ctxt, s, c, toPad) 2018} 2019 2020// reAssemble is called if an instruction's size changes during assembly. If 2021// it does and the instruction is a standalone or a macro-fused jump we need to 2022// reassemble. 2023func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool { 2024 if pjc == 0 { 2025 return false 2026 } 2027 2028 fj, _ := fusedJump(p) 2029 return fj || isJump(p) 2030} 2031 2032type nopPad struct { 2033 p *obj.Prog // Instruction before the pad 2034 n int32 // Size of the pad 2035} 2036 2037func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { 2038 if ctxt.Retpoline && ctxt.Arch.Family == sys.I386 { 2039 ctxt.Diag("-spectre=ret not supported on 386") 2040 ctxt.Retpoline = false // don't keep printing 2041 } 2042 2043 pjc := makePjcCtx(ctxt) 2044 2045 if s.P != nil { 2046 return 2047 } 2048 2049 if ycover[0] == 0 { 2050 ctxt.Diag("x86 tables not initialized, call x86.instinit first") 2051 } 2052 2053 for p := s.Func().Text; p != nil; p = p.Link { 2054 if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil { 2055 p.To.SetTarget(p) 2056 } 2057 if p.As == AADJSP { 2058 p.To.Type = obj.TYPE_REG 2059 p.To.Reg = REG_SP 2060 // Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive. 2061 // One exception: It is smaller to encode $-0x80 than $0x80. 2062 // For that case, flip the sign and the op: 2063 // Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'. 2064 switch v := p.From.Offset; { 2065 case v == 0: 2066 p.As = obj.ANOP 2067 case v == 0x80 || (v < 0 && v != -0x80): 2068 p.As = spadjop(ctxt, AADDL, AADDQ) 2069 p.From.Offset *= -1 2070 default: 2071 p.As = spadjop(ctxt, ASUBL, ASUBQ) 2072 } 2073 } 2074 if ctxt.Retpoline && (p.As == obj.ACALL || p.As == obj.AJMP) && (p.To.Type == obj.TYPE_REG || p.To.Type == obj.TYPE_MEM) { 2075 if p.To.Type != obj.TYPE_REG { 2076 ctxt.Diag("non-retpoline-compatible: %v", p) 2077 continue 2078 } 2079 p.To.Type = obj.TYPE_BRANCH 2080 p.To.Name = obj.NAME_EXTERN 2081 p.To.Sym = ctxt.Lookup("runtime.retpoline" + obj.Rconv(int(p.To.Reg))) 2082 p.To.Reg = 0 2083 p.To.Offset = 0 2084 } 2085 } 2086 2087 var count int64 // rough count of number of instructions 2088 for p := s.Func().Text; p != nil; p = p.Link { 2089 count++ 2090 p.Back = branchShort // use short branches first time through 2091 if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) { 2092 p.Back |= branchBackwards 2093 q.Back |= branchLoopHead 2094 } 2095 } 2096 s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction 2097 2098 var ab AsmBuf 2099 var n int 2100 var c int32 2101 errors := ctxt.Errors 2102 var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies) 2103 nrelocs0 := len(s.R) 2104 for { 2105 // This loop continues while there are reasons to re-assemble 2106 // whole block, like the presence of long forward jumps. 2107 reAssemble := false 2108 for i := range s.R[nrelocs0:] { 2109 s.R[nrelocs0+i] = obj.Reloc{} 2110 } 2111 s.R = s.R[:nrelocs0] // preserve marker relocations generated by the compiler 2112 s.P = s.P[:0] 2113 c = 0 2114 var pPrev *obj.Prog 2115 nops = nops[:0] 2116 for p := s.Func().Text; p != nil; p = p.Link { 2117 c0 := c 2118 c = pjc.padJump(ctxt, s, p, c) 2119 2120 if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 { 2121 // pad with NOPs 2122 v := -c & (loopAlign - 1) 2123 2124 if v <= maxLoopPad { 2125 s.Grow(int64(c) + int64(v)) 2126 fillnop(s.P[c:], int(v)) 2127 c += v 2128 } 2129 } 2130 2131 p.Pc = int64(c) 2132 2133 // process forward jumps to p 2134 for q := p.Rel; q != nil; q = q.Forwd { 2135 v := int32(p.Pc - (q.Pc + int64(q.Isize))) 2136 if q.Back&branchShort != 0 { 2137 if v > 127 { 2138 reAssemble = true 2139 q.Back ^= branchShort 2140 } 2141 2142 if q.As == AJCXZL || q.As == AXBEGIN { 2143 s.P[q.Pc+2] = byte(v) 2144 } else { 2145 s.P[q.Pc+1] = byte(v) 2146 } 2147 } else { 2148 binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v)) 2149 } 2150 } 2151 2152 p.Rel = nil 2153 2154 p.Pc = int64(c) 2155 ab.asmins(ctxt, s, p) 2156 m := ab.Len() 2157 if int(p.Isize) != m { 2158 p.Isize = uint8(m) 2159 if pjc.reAssemble(p) { 2160 // We need to re-assemble here to check for jumps and fused jumps 2161 // that span or end on 32 byte boundaries. 2162 reAssemble = true 2163 } 2164 } 2165 2166 s.Grow(p.Pc + int64(m)) 2167 copy(s.P[p.Pc:], ab.Bytes()) 2168 // If there was padding, remember it. 2169 if pPrev != nil && !ctxt.IsAsm && c > c0 { 2170 nops = append(nops, nopPad{p: pPrev, n: c - c0}) 2171 } 2172 c += int32(m) 2173 pPrev = p 2174 } 2175 2176 n++ 2177 if n > 1000 { 2178 ctxt.Diag("span must be looping") 2179 log.Fatalf("loop") 2180 } 2181 if !reAssemble { 2182 break 2183 } 2184 if ctxt.Errors > errors { 2185 return 2186 } 2187 } 2188 // splice padding nops into Progs 2189 for _, n := range nops { 2190 pp := n.p 2191 np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)} 2192 pp.Link = np 2193 } 2194 2195 s.Size = int64(c) 2196 2197 if false { /* debug['a'] > 1 */ 2198 fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0) 2199 var i int 2200 for i = 0; i < len(s.P); i++ { 2201 fmt.Printf(" %.2x", s.P[i]) 2202 if i%16 == 15 { 2203 fmt.Printf("\n %.6x", uint(i+1)) 2204 } 2205 } 2206 2207 if i%16 != 0 { 2208 fmt.Printf("\n") 2209 } 2210 2211 for i := 0; i < len(s.R); i++ { 2212 r := &s.R[i] 2213 fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add) 2214 } 2215 } 2216 2217 // Mark nonpreemptible instruction sequences. 2218 // The 2-instruction TLS access sequence 2219 // MOVQ TLS, BX 2220 // MOVQ 0(BX)(TLS*1), BX 2221 // is not async preemptible, as if it is preempted and resumed on 2222 // a different thread, the TLS address may become invalid. 2223 if !CanUse1InsnTLS(ctxt) { 2224 useTLS := func(p *obj.Prog) bool { 2225 // Only need to mark the second instruction, which has 2226 // REG_TLS as Index. (It is okay to interrupt and restart 2227 // the first instruction.) 2228 return p.From.Index == REG_TLS 2229 } 2230 obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil) 2231 } 2232} 2233 2234func instinit(ctxt *obj.Link) { 2235 if ycover[0] != 0 { 2236 // Already initialized; stop now. 2237 // This happens in the cmd/asm tests, 2238 // each of which re-initializes the arch. 2239 return 2240 } 2241 2242 switch ctxt.Headtype { 2243 case objabi.Hplan9: 2244 plan9privates = ctxt.Lookup("_privates") 2245 } 2246 2247 for i := range avxOptab { 2248 c := avxOptab[i].as 2249 if opindex[c&obj.AMask] != nil { 2250 ctxt.Diag("phase error in avxOptab: %d (%v)", i, c) 2251 } 2252 opindex[c&obj.AMask] = &avxOptab[i] 2253 } 2254 for i := 1; optab[i].as != 0; i++ { 2255 c := optab[i].as 2256 if opindex[c&obj.AMask] != nil { 2257 ctxt.Diag("phase error in optab: %d (%v)", i, c) 2258 } 2259 opindex[c&obj.AMask] = &optab[i] 2260 } 2261 2262 for i := 0; i < Ymax; i++ { 2263 ycover[i*Ymax+i] = 1 2264 } 2265 2266 ycover[Yi0*Ymax+Yu2] = 1 2267 ycover[Yi1*Ymax+Yu2] = 1 2268 2269 ycover[Yi0*Ymax+Yi8] = 1 2270 ycover[Yi1*Ymax+Yi8] = 1 2271 ycover[Yu2*Ymax+Yi8] = 1 2272 ycover[Yu7*Ymax+Yi8] = 1 2273 2274 ycover[Yi0*Ymax+Yu7] = 1 2275 ycover[Yi1*Ymax+Yu7] = 1 2276 ycover[Yu2*Ymax+Yu7] = 1 2277 2278 ycover[Yi0*Ymax+Yu8] = 1 2279 ycover[Yi1*Ymax+Yu8] = 1 2280 ycover[Yu2*Ymax+Yu8] = 1 2281 ycover[Yu7*Ymax+Yu8] = 1 2282 2283 ycover[Yi0*Ymax+Ys32] = 1 2284 ycover[Yi1*Ymax+Ys32] = 1 2285 ycover[Yu2*Ymax+Ys32] = 1 2286 ycover[Yu7*Ymax+Ys32] = 1 2287 ycover[Yu8*Ymax+Ys32] = 1 2288 ycover[Yi8*Ymax+Ys32] = 1 2289 2290 ycover[Yi0*Ymax+Yi32] = 1 2291 ycover[Yi1*Ymax+Yi32] = 1 2292 ycover[Yu2*Ymax+Yi32] = 1 2293 ycover[Yu7*Ymax+Yi32] = 1 2294 ycover[Yu8*Ymax+Yi32] = 1 2295 ycover[Yi8*Ymax+Yi32] = 1 2296 ycover[Ys32*Ymax+Yi32] = 1 2297 2298 ycover[Yi0*Ymax+Yi64] = 1 2299 ycover[Yi1*Ymax+Yi64] = 1 2300 ycover[Yu7*Ymax+Yi64] = 1 2301 ycover[Yu2*Ymax+Yi64] = 1 2302 ycover[Yu8*Ymax+Yi64] = 1 2303 ycover[Yi8*Ymax+Yi64] = 1 2304 ycover[Ys32*Ymax+Yi64] = 1 2305 ycover[Yi32*Ymax+Yi64] = 1 2306 2307 ycover[Yal*Ymax+Yrb] = 1 2308 ycover[Ycl*Ymax+Yrb] = 1 2309 ycover[Yax*Ymax+Yrb] = 1 2310 ycover[Ycx*Ymax+Yrb] = 1 2311 ycover[Yrx*Ymax+Yrb] = 1 2312 ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32 2313 2314 ycover[Ycl*Ymax+Ycx] = 1 2315 2316 ycover[Yax*Ymax+Yrx] = 1 2317 ycover[Ycx*Ymax+Yrx] = 1 2318 2319 ycover[Yax*Ymax+Yrl] = 1 2320 ycover[Ycx*Ymax+Yrl] = 1 2321 ycover[Yrx*Ymax+Yrl] = 1 2322 ycover[Yrl32*Ymax+Yrl] = 1 2323 2324 ycover[Yf0*Ymax+Yrf] = 1 2325 2326 ycover[Yal*Ymax+Ymb] = 1 2327 ycover[Ycl*Ymax+Ymb] = 1 2328 ycover[Yax*Ymax+Ymb] = 1 2329 ycover[Ycx*Ymax+Ymb] = 1 2330 ycover[Yrx*Ymax+Ymb] = 1 2331 ycover[Yrb*Ymax+Ymb] = 1 2332 ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32 2333 ycover[Ym*Ymax+Ymb] = 1 2334 2335 ycover[Yax*Ymax+Yml] = 1 2336 ycover[Ycx*Ymax+Yml] = 1 2337 ycover[Yrx*Ymax+Yml] = 1 2338 ycover[Yrl*Ymax+Yml] = 1 2339 ycover[Yrl32*Ymax+Yml] = 1 2340 ycover[Ym*Ymax+Yml] = 1 2341 2342 ycover[Yax*Ymax+Ymm] = 1 2343 ycover[Ycx*Ymax+Ymm] = 1 2344 ycover[Yrx*Ymax+Ymm] = 1 2345 ycover[Yrl*Ymax+Ymm] = 1 2346 ycover[Yrl32*Ymax+Ymm] = 1 2347 ycover[Ym*Ymax+Ymm] = 1 2348 ycover[Ymr*Ymax+Ymm] = 1 2349 2350 ycover[Yxr0*Ymax+Yxr] = 1 2351 2352 ycover[Ym*Ymax+Yxm] = 1 2353 ycover[Yxr0*Ymax+Yxm] = 1 2354 ycover[Yxr*Ymax+Yxm] = 1 2355 2356 ycover[Ym*Ymax+Yym] = 1 2357 ycover[Yyr*Ymax+Yym] = 1 2358 2359 ycover[Yxr0*Ymax+YxrEvex] = 1 2360 ycover[Yxr*Ymax+YxrEvex] = 1 2361 2362 ycover[Ym*Ymax+YxmEvex] = 1 2363 ycover[Yxr0*Ymax+YxmEvex] = 1 2364 ycover[Yxr*Ymax+YxmEvex] = 1 2365 ycover[YxrEvex*Ymax+YxmEvex] = 1 2366 2367 ycover[Yyr*Ymax+YyrEvex] = 1 2368 2369 ycover[Ym*Ymax+YymEvex] = 1 2370 ycover[Yyr*Ymax+YymEvex] = 1 2371 ycover[YyrEvex*Ymax+YymEvex] = 1 2372 2373 ycover[Ym*Ymax+Yzm] = 1 2374 ycover[Yzr*Ymax+Yzm] = 1 2375 2376 ycover[Yk0*Ymax+Yk] = 1 2377 ycover[Yknot0*Ymax+Yk] = 1 2378 2379 ycover[Yk0*Ymax+Ykm] = 1 2380 ycover[Yknot0*Ymax+Ykm] = 1 2381 ycover[Yk*Ymax+Ykm] = 1 2382 ycover[Ym*Ymax+Ykm] = 1 2383 2384 ycover[Yxvm*Ymax+YxvmEvex] = 1 2385 2386 ycover[Yyvm*Ymax+YyvmEvex] = 1 2387 2388 for i := 0; i < MAXREG; i++ { 2389 reg[i] = -1 2390 if i >= REG_AL && i <= REG_R15B { 2391 reg[i] = (i - REG_AL) & 7 2392 if i >= REG_SPB && i <= REG_DIB { 2393 regrex[i] = 0x40 2394 } 2395 if i >= REG_R8B && i <= REG_R15B { 2396 regrex[i] = Rxr | Rxx | Rxb 2397 } 2398 } 2399 2400 if i >= REG_AH && i <= REG_BH { 2401 reg[i] = 4 + ((i - REG_AH) & 7) 2402 } 2403 if i >= REG_AX && i <= REG_R15 { 2404 reg[i] = (i - REG_AX) & 7 2405 if i >= REG_R8 { 2406 regrex[i] = Rxr | Rxx | Rxb 2407 } 2408 } 2409 2410 if i >= REG_F0 && i <= REG_F0+7 { 2411 reg[i] = (i - REG_F0) & 7 2412 } 2413 if i >= REG_M0 && i <= REG_M0+7 { 2414 reg[i] = (i - REG_M0) & 7 2415 } 2416 if i >= REG_K0 && i <= REG_K0+7 { 2417 reg[i] = (i - REG_K0) & 7 2418 } 2419 if i >= REG_X0 && i <= REG_X0+15 { 2420 reg[i] = (i - REG_X0) & 7 2421 if i >= REG_X0+8 { 2422 regrex[i] = Rxr | Rxx | Rxb 2423 } 2424 } 2425 if i >= REG_X16 && i <= REG_X16+15 { 2426 reg[i] = (i - REG_X16) & 7 2427 if i >= REG_X16+8 { 2428 regrex[i] = Rxr | Rxx | Rxb | RxrEvex 2429 } else { 2430 regrex[i] = RxrEvex 2431 } 2432 } 2433 if i >= REG_Y0 && i <= REG_Y0+15 { 2434 reg[i] = (i - REG_Y0) & 7 2435 if i >= REG_Y0+8 { 2436 regrex[i] = Rxr | Rxx | Rxb 2437 } 2438 } 2439 if i >= REG_Y16 && i <= REG_Y16+15 { 2440 reg[i] = (i - REG_Y16) & 7 2441 if i >= REG_Y16+8 { 2442 regrex[i] = Rxr | Rxx | Rxb | RxrEvex 2443 } else { 2444 regrex[i] = RxrEvex 2445 } 2446 } 2447 if i >= REG_Z0 && i <= REG_Z0+15 { 2448 reg[i] = (i - REG_Z0) & 7 2449 if i > REG_Z0+7 { 2450 regrex[i] = Rxr | Rxx | Rxb 2451 } 2452 } 2453 if i >= REG_Z16 && i <= REG_Z16+15 { 2454 reg[i] = (i - REG_Z16) & 7 2455 if i >= REG_Z16+8 { 2456 regrex[i] = Rxr | Rxx | Rxb | RxrEvex 2457 } else { 2458 regrex[i] = RxrEvex 2459 } 2460 } 2461 2462 if i >= REG_CR+8 && i <= REG_CR+15 { 2463 regrex[i] = Rxr 2464 } 2465 } 2466} 2467 2468var isAndroid = buildcfg.GOOS == "android" 2469 2470func prefixof(ctxt *obj.Link, a *obj.Addr) int { 2471 if a.Reg < REG_CS && a.Index < REG_CS { // fast path 2472 return 0 2473 } 2474 if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE { 2475 switch a.Reg { 2476 case REG_CS: 2477 return 0x2e 2478 2479 case REG_DS: 2480 return 0x3e 2481 2482 case REG_ES: 2483 return 0x26 2484 2485 case REG_FS: 2486 return 0x64 2487 2488 case REG_GS: 2489 return 0x65 2490 2491 case REG_TLS: 2492 // NOTE: Systems listed here should be only systems that 2493 // support direct TLS references like 8(TLS) implemented as 2494 // direct references from FS or GS. Systems that require 2495 // the initial-exec model, where you load the TLS base into 2496 // a register and then index from that register, do not reach 2497 // this code and should not be listed. 2498 if ctxt.Arch.Family == sys.I386 { 2499 switch ctxt.Headtype { 2500 default: 2501 if isAndroid { 2502 return 0x65 // GS 2503 } 2504 log.Fatalf("unknown TLS base register for %v", ctxt.Headtype) 2505 2506 case objabi.Hdarwin, 2507 objabi.Hdragonfly, 2508 objabi.Hfreebsd, 2509 objabi.Hnetbsd, 2510 objabi.Hopenbsd: 2511 return 0x65 // GS 2512 } 2513 } 2514 2515 switch ctxt.Headtype { 2516 default: 2517 log.Fatalf("unknown TLS base register for %v", ctxt.Headtype) 2518 2519 case objabi.Hlinux: 2520 if isAndroid { 2521 return 0x64 // FS 2522 } 2523 2524 if ctxt.Flag_shared { 2525 log.Fatalf("unknown TLS base register for linux with -shared") 2526 } else { 2527 return 0x64 // FS 2528 } 2529 2530 case objabi.Hdragonfly, 2531 objabi.Hfreebsd, 2532 objabi.Hnetbsd, 2533 objabi.Hopenbsd, 2534 objabi.Hsolaris: 2535 return 0x64 // FS 2536 2537 case objabi.Hdarwin: 2538 return 0x65 // GS 2539 } 2540 } 2541 } 2542 2543 if ctxt.Arch.Family == sys.I386 { 2544 if a.Index == REG_TLS && ctxt.Flag_shared { 2545 // When building for inclusion into a shared library, an instruction of the form 2546 // MOVL off(CX)(TLS*1), AX 2547 // becomes 2548 // mov %gs:off(%ecx), %eax 2549 // which assumes that the correct TLS offset has been loaded into %ecx (today 2550 // there is only one TLS variable -- g -- so this is OK). When not building for 2551 // a shared library the instruction it becomes 2552 // mov 0x0(%ecx), %eax 2553 // and a R_TLS_LE relocation, and so does not require a prefix. 2554 return 0x65 // GS 2555 } 2556 return 0 2557 } 2558 2559 switch a.Index { 2560 case REG_CS: 2561 return 0x2e 2562 2563 case REG_DS: 2564 return 0x3e 2565 2566 case REG_ES: 2567 return 0x26 2568 2569 case REG_TLS: 2570 if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows { 2571 // When building for inclusion into a shared library, an instruction of the form 2572 // MOV off(CX)(TLS*1), AX 2573 // becomes 2574 // mov %fs:off(%rcx), %rax 2575 // which assumes that the correct TLS offset has been loaded into %rcx (today 2576 // there is only one TLS variable -- g -- so this is OK). When not building for 2577 // a shared library the instruction does not require a prefix. 2578 return 0x64 2579 } 2580 2581 case REG_FS: 2582 return 0x64 2583 2584 case REG_GS: 2585 return 0x65 2586 } 2587 2588 return 0 2589} 2590 2591// oclassRegList returns multisource operand class for addr. 2592func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int { 2593 // TODO(quasilyte): when oclass register case is refactored into 2594 // lookup table, use it here to get register kind more easily. 2595 // Helper functions like regIsXmm should go away too (they will become redundant). 2596 2597 regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 } 2598 regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 } 2599 regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 } 2600 2601 reg0, reg1 := decodeRegisterRange(addr.Offset) 2602 low := regIndex(int16(reg0)) 2603 high := regIndex(int16(reg1)) 2604 2605 if ctxt.Arch.Family == sys.I386 { 2606 if low >= 8 || high >= 8 { 2607 return Yxxx 2608 } 2609 } 2610 2611 switch high - low { 2612 case 3: 2613 switch { 2614 case regIsXmm(reg0) && regIsXmm(reg1): 2615 return YxrEvexMulti4 2616 case regIsYmm(reg0) && regIsYmm(reg1): 2617 return YyrEvexMulti4 2618 case regIsZmm(reg0) && regIsZmm(reg1): 2619 return YzrMulti4 2620 default: 2621 return Yxxx 2622 } 2623 default: 2624 return Yxxx 2625 } 2626} 2627 2628// oclassVMem returns V-mem (vector memory with VSIB) operand class. 2629// For addr that is not V-mem returns (Yxxx, false). 2630func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) { 2631 switch addr.Index { 2632 case REG_X0 + 0, 2633 REG_X0 + 1, 2634 REG_X0 + 2, 2635 REG_X0 + 3, 2636 REG_X0 + 4, 2637 REG_X0 + 5, 2638 REG_X0 + 6, 2639 REG_X0 + 7: 2640 return Yxvm, true 2641 case REG_X8 + 0, 2642 REG_X8 + 1, 2643 REG_X8 + 2, 2644 REG_X8 + 3, 2645 REG_X8 + 4, 2646 REG_X8 + 5, 2647 REG_X8 + 6, 2648 REG_X8 + 7: 2649 if ctxt.Arch.Family == sys.I386 { 2650 return Yxxx, true 2651 } 2652 return Yxvm, true 2653 case REG_X16 + 0, 2654 REG_X16 + 1, 2655 REG_X16 + 2, 2656 REG_X16 + 3, 2657 REG_X16 + 4, 2658 REG_X16 + 5, 2659 REG_X16 + 6, 2660 REG_X16 + 7, 2661 REG_X16 + 8, 2662 REG_X16 + 9, 2663 REG_X16 + 10, 2664 REG_X16 + 11, 2665 REG_X16 + 12, 2666 REG_X16 + 13, 2667 REG_X16 + 14, 2668 REG_X16 + 15: 2669 if ctxt.Arch.Family == sys.I386 { 2670 return Yxxx, true 2671 } 2672 return YxvmEvex, true 2673 2674 case REG_Y0 + 0, 2675 REG_Y0 + 1, 2676 REG_Y0 + 2, 2677 REG_Y0 + 3, 2678 REG_Y0 + 4, 2679 REG_Y0 + 5, 2680 REG_Y0 + 6, 2681 REG_Y0 + 7: 2682 return Yyvm, true 2683 case REG_Y8 + 0, 2684 REG_Y8 + 1, 2685 REG_Y8 + 2, 2686 REG_Y8 + 3, 2687 REG_Y8 + 4, 2688 REG_Y8 + 5, 2689 REG_Y8 + 6, 2690 REG_Y8 + 7: 2691 if ctxt.Arch.Family == sys.I386 { 2692 return Yxxx, true 2693 } 2694 return Yyvm, true 2695 case REG_Y16 + 0, 2696 REG_Y16 + 1, 2697 REG_Y16 + 2, 2698 REG_Y16 + 3, 2699 REG_Y16 + 4, 2700 REG_Y16 + 5, 2701 REG_Y16 + 6, 2702 REG_Y16 + 7, 2703 REG_Y16 + 8, 2704 REG_Y16 + 9, 2705 REG_Y16 + 10, 2706 REG_Y16 + 11, 2707 REG_Y16 + 12, 2708 REG_Y16 + 13, 2709 REG_Y16 + 14, 2710 REG_Y16 + 15: 2711 if ctxt.Arch.Family == sys.I386 { 2712 return Yxxx, true 2713 } 2714 return YyvmEvex, true 2715 2716 case REG_Z0 + 0, 2717 REG_Z0 + 1, 2718 REG_Z0 + 2, 2719 REG_Z0 + 3, 2720 REG_Z0 + 4, 2721 REG_Z0 + 5, 2722 REG_Z0 + 6, 2723 REG_Z0 + 7: 2724 return Yzvm, true 2725 case REG_Z8 + 0, 2726 REG_Z8 + 1, 2727 REG_Z8 + 2, 2728 REG_Z8 + 3, 2729 REG_Z8 + 4, 2730 REG_Z8 + 5, 2731 REG_Z8 + 6, 2732 REG_Z8 + 7, 2733 REG_Z8 + 8, 2734 REG_Z8 + 9, 2735 REG_Z8 + 10, 2736 REG_Z8 + 11, 2737 REG_Z8 + 12, 2738 REG_Z8 + 13, 2739 REG_Z8 + 14, 2740 REG_Z8 + 15, 2741 REG_Z8 + 16, 2742 REG_Z8 + 17, 2743 REG_Z8 + 18, 2744 REG_Z8 + 19, 2745 REG_Z8 + 20, 2746 REG_Z8 + 21, 2747 REG_Z8 + 22, 2748 REG_Z8 + 23: 2749 if ctxt.Arch.Family == sys.I386 { 2750 return Yxxx, true 2751 } 2752 return Yzvm, true 2753 } 2754 2755 return Yxxx, false 2756} 2757 2758func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int { 2759 switch a.Type { 2760 case obj.TYPE_REGLIST: 2761 return oclassRegList(ctxt, a) 2762 2763 case obj.TYPE_NONE: 2764 return Ynone 2765 2766 case obj.TYPE_BRANCH: 2767 return Ybr 2768 2769 case obj.TYPE_INDIR: 2770 if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 { 2771 return Yindir 2772 } 2773 return Yxxx 2774 2775 case obj.TYPE_MEM: 2776 // Pseudo registers have negative index, but SP is 2777 // not pseudo on x86, hence REG_SP check is not redundant. 2778 if a.Index == REG_SP || a.Index < 0 { 2779 // Can't use FP/SB/PC/SP as the index register. 2780 return Yxxx 2781 } 2782 2783 if vmem, ok := oclassVMem(ctxt, a); ok { 2784 return vmem 2785 } 2786 2787 if ctxt.Arch.Family == sys.AMD64 { 2788 switch a.Name { 2789 case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF: 2790 // Global variables can't use index registers and their 2791 // base register is %rip (%rip is encoded as REG_NONE). 2792 if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 { 2793 return Yxxx 2794 } 2795 case obj.NAME_AUTO, obj.NAME_PARAM: 2796 // These names must have a base of SP. The old compiler 2797 // uses 0 for the base register. SSA uses REG_SP. 2798 if a.Reg != REG_SP && a.Reg != 0 { 2799 return Yxxx 2800 } 2801 case obj.NAME_NONE: 2802 // everything is ok 2803 default: 2804 // unknown name 2805 return Yxxx 2806 } 2807 } 2808 return Ym 2809 2810 case obj.TYPE_ADDR: 2811 switch a.Name { 2812 case obj.NAME_GOTREF: 2813 ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF") 2814 return Yxxx 2815 2816 case obj.NAME_EXTERN, 2817 obj.NAME_STATIC: 2818 if a.Sym != nil && useAbs(ctxt, a.Sym) { 2819 return Yi32 2820 } 2821 return Yiauto // use pc-relative addressing 2822 2823 case obj.NAME_AUTO, 2824 obj.NAME_PARAM: 2825 return Yiauto 2826 } 2827 2828 // TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index 2829 // and got Yi32 in an earlier version of this code. 2830 // Keep doing that until we fix yduff etc. 2831 if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") { 2832 return Yi32 2833 } 2834 2835 if a.Sym != nil || a.Name != obj.NAME_NONE { 2836 ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a)) 2837 } 2838 fallthrough 2839 2840 case obj.TYPE_CONST: 2841 if a.Sym != nil { 2842 ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a)) 2843 } 2844 2845 v := a.Offset 2846 if ctxt.Arch.Family == sys.I386 { 2847 v = int64(int32(v)) 2848 } 2849 switch { 2850 case v == 0: 2851 return Yi0 2852 case v == 1: 2853 return Yi1 2854 case v >= 0 && v <= 3: 2855 return Yu2 2856 case v >= 0 && v <= 127: 2857 return Yu7 2858 case v >= 0 && v <= 255: 2859 return Yu8 2860 case v >= -128 && v <= 127: 2861 return Yi8 2862 } 2863 if ctxt.Arch.Family == sys.I386 { 2864 return Yi32 2865 } 2866 l := int32(v) 2867 if int64(l) == v { 2868 return Ys32 // can sign extend 2869 } 2870 if v>>32 == 0 { 2871 return Yi32 // unsigned 2872 } 2873 return Yi64 2874 2875 case obj.TYPE_TEXTSIZE: 2876 return Ytextsize 2877 } 2878 2879 if a.Type != obj.TYPE_REG { 2880 ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a)) 2881 return Yxxx 2882 } 2883 2884 switch a.Reg { 2885 case REG_AL: 2886 return Yal 2887 2888 case REG_AX: 2889 return Yax 2890 2891 /* 2892 case REG_SPB: 2893 */ 2894 case REG_BPB, 2895 REG_SIB, 2896 REG_DIB, 2897 REG_R8B, 2898 REG_R9B, 2899 REG_R10B, 2900 REG_R11B, 2901 REG_R12B, 2902 REG_R13B, 2903 REG_R14B, 2904 REG_R15B: 2905 if ctxt.Arch.Family == sys.I386 { 2906 return Yxxx 2907 } 2908 fallthrough 2909 2910 case REG_DL, 2911 REG_BL, 2912 REG_AH, 2913 REG_CH, 2914 REG_DH, 2915 REG_BH: 2916 return Yrb 2917 2918 case REG_CL: 2919 return Ycl 2920 2921 case REG_CX: 2922 return Ycx 2923 2924 case REG_DX, REG_BX: 2925 return Yrx 2926 2927 case REG_R8, // not really Yrl 2928 REG_R9, 2929 REG_R10, 2930 REG_R11, 2931 REG_R12, 2932 REG_R13, 2933 REG_R14, 2934 REG_R15: 2935 if ctxt.Arch.Family == sys.I386 { 2936 return Yxxx 2937 } 2938 fallthrough 2939 2940 case REG_SP, REG_BP, REG_SI, REG_DI: 2941 if ctxt.Arch.Family == sys.I386 { 2942 return Yrl32 2943 } 2944 return Yrl 2945 2946 case REG_F0 + 0: 2947 return Yf0 2948 2949 case REG_F0 + 1, 2950 REG_F0 + 2, 2951 REG_F0 + 3, 2952 REG_F0 + 4, 2953 REG_F0 + 5, 2954 REG_F0 + 6, 2955 REG_F0 + 7: 2956 return Yrf 2957 2958 case REG_M0 + 0, 2959 REG_M0 + 1, 2960 REG_M0 + 2, 2961 REG_M0 + 3, 2962 REG_M0 + 4, 2963 REG_M0 + 5, 2964 REG_M0 + 6, 2965 REG_M0 + 7: 2966 return Ymr 2967 2968 case REG_X0: 2969 return Yxr0 2970 2971 case REG_X0 + 1, 2972 REG_X0 + 2, 2973 REG_X0 + 3, 2974 REG_X0 + 4, 2975 REG_X0 + 5, 2976 REG_X0 + 6, 2977 REG_X0 + 7, 2978 REG_X0 + 8, 2979 REG_X0 + 9, 2980 REG_X0 + 10, 2981 REG_X0 + 11, 2982 REG_X0 + 12, 2983 REG_X0 + 13, 2984 REG_X0 + 14, 2985 REG_X0 + 15: 2986 return Yxr 2987 2988 case REG_X0 + 16, 2989 REG_X0 + 17, 2990 REG_X0 + 18, 2991 REG_X0 + 19, 2992 REG_X0 + 20, 2993 REG_X0 + 21, 2994 REG_X0 + 22, 2995 REG_X0 + 23, 2996 REG_X0 + 24, 2997 REG_X0 + 25, 2998 REG_X0 + 26, 2999 REG_X0 + 27, 3000 REG_X0 + 28, 3001 REG_X0 + 29, 3002 REG_X0 + 30, 3003 REG_X0 + 31: 3004 return YxrEvex 3005 3006 case REG_Y0 + 0, 3007 REG_Y0 + 1, 3008 REG_Y0 + 2, 3009 REG_Y0 + 3, 3010 REG_Y0 + 4, 3011 REG_Y0 + 5, 3012 REG_Y0 + 6, 3013 REG_Y0 + 7, 3014 REG_Y0 + 8, 3015 REG_Y0 + 9, 3016 REG_Y0 + 10, 3017 REG_Y0 + 11, 3018 REG_Y0 + 12, 3019 REG_Y0 + 13, 3020 REG_Y0 + 14, 3021 REG_Y0 + 15: 3022 return Yyr 3023 3024 case REG_Y0 + 16, 3025 REG_Y0 + 17, 3026 REG_Y0 + 18, 3027 REG_Y0 + 19, 3028 REG_Y0 + 20, 3029 REG_Y0 + 21, 3030 REG_Y0 + 22, 3031 REG_Y0 + 23, 3032 REG_Y0 + 24, 3033 REG_Y0 + 25, 3034 REG_Y0 + 26, 3035 REG_Y0 + 27, 3036 REG_Y0 + 28, 3037 REG_Y0 + 29, 3038 REG_Y0 + 30, 3039 REG_Y0 + 31: 3040 return YyrEvex 3041 3042 case REG_Z0 + 0, 3043 REG_Z0 + 1, 3044 REG_Z0 + 2, 3045 REG_Z0 + 3, 3046 REG_Z0 + 4, 3047 REG_Z0 + 5, 3048 REG_Z0 + 6, 3049 REG_Z0 + 7: 3050 return Yzr 3051 3052 case REG_Z0 + 8, 3053 REG_Z0 + 9, 3054 REG_Z0 + 10, 3055 REG_Z0 + 11, 3056 REG_Z0 + 12, 3057 REG_Z0 + 13, 3058 REG_Z0 + 14, 3059 REG_Z0 + 15, 3060 REG_Z0 + 16, 3061 REG_Z0 + 17, 3062 REG_Z0 + 18, 3063 REG_Z0 + 19, 3064 REG_Z0 + 20, 3065 REG_Z0 + 21, 3066 REG_Z0 + 22, 3067 REG_Z0 + 23, 3068 REG_Z0 + 24, 3069 REG_Z0 + 25, 3070 REG_Z0 + 26, 3071 REG_Z0 + 27, 3072 REG_Z0 + 28, 3073 REG_Z0 + 29, 3074 REG_Z0 + 30, 3075 REG_Z0 + 31: 3076 if ctxt.Arch.Family == sys.I386 { 3077 return Yxxx 3078 } 3079 return Yzr 3080 3081 case REG_K0: 3082 return Yk0 3083 3084 case REG_K0 + 1, 3085 REG_K0 + 2, 3086 REG_K0 + 3, 3087 REG_K0 + 4, 3088 REG_K0 + 5, 3089 REG_K0 + 6, 3090 REG_K0 + 7: 3091 return Yknot0 3092 3093 case REG_CS: 3094 return Ycs 3095 case REG_SS: 3096 return Yss 3097 case REG_DS: 3098 return Yds 3099 case REG_ES: 3100 return Yes 3101 case REG_FS: 3102 return Yfs 3103 case REG_GS: 3104 return Ygs 3105 case REG_TLS: 3106 return Ytls 3107 3108 case REG_GDTR: 3109 return Ygdtr 3110 case REG_IDTR: 3111 return Yidtr 3112 case REG_LDTR: 3113 return Yldtr 3114 case REG_MSW: 3115 return Ymsw 3116 case REG_TASK: 3117 return Ytask 3118 3119 case REG_CR + 0: 3120 return Ycr0 3121 case REG_CR + 1: 3122 return Ycr1 3123 case REG_CR + 2: 3124 return Ycr2 3125 case REG_CR + 3: 3126 return Ycr3 3127 case REG_CR + 4: 3128 return Ycr4 3129 case REG_CR + 5: 3130 return Ycr5 3131 case REG_CR + 6: 3132 return Ycr6 3133 case REG_CR + 7: 3134 return Ycr7 3135 case REG_CR + 8: 3136 return Ycr8 3137 3138 case REG_DR + 0: 3139 return Ydr0 3140 case REG_DR + 1: 3141 return Ydr1 3142 case REG_DR + 2: 3143 return Ydr2 3144 case REG_DR + 3: 3145 return Ydr3 3146 case REG_DR + 4: 3147 return Ydr4 3148 case REG_DR + 5: 3149 return Ydr5 3150 case REG_DR + 6: 3151 return Ydr6 3152 case REG_DR + 7: 3153 return Ydr7 3154 3155 case REG_TR + 0: 3156 return Ytr0 3157 case REG_TR + 1: 3158 return Ytr1 3159 case REG_TR + 2: 3160 return Ytr2 3161 case REG_TR + 3: 3162 return Ytr3 3163 case REG_TR + 4: 3164 return Ytr4 3165 case REG_TR + 5: 3166 return Ytr5 3167 case REG_TR + 6: 3168 return Ytr6 3169 case REG_TR + 7: 3170 return Ytr7 3171 } 3172 3173 return Yxxx 3174} 3175 3176// AsmBuf is a simple buffer to assemble variable-length x86 instructions into 3177// and hold assembly state. 3178type AsmBuf struct { 3179 buf [100]byte 3180 off int 3181 rexflag int 3182 vexflag bool // Per inst: true for VEX-encoded 3183 evexflag bool // Per inst: true for EVEX-encoded 3184 rep bool 3185 repn bool 3186 lock bool 3187 3188 evex evexBits // Initialized when evexflag is true 3189} 3190 3191// Put1 appends one byte to the end of the buffer. 3192func (ab *AsmBuf) Put1(x byte) { 3193 ab.buf[ab.off] = x 3194 ab.off++ 3195} 3196 3197// Put2 appends two bytes to the end of the buffer. 3198func (ab *AsmBuf) Put2(x, y byte) { 3199 ab.buf[ab.off+0] = x 3200 ab.buf[ab.off+1] = y 3201 ab.off += 2 3202} 3203 3204// Put3 appends three bytes to the end of the buffer. 3205func (ab *AsmBuf) Put3(x, y, z byte) { 3206 ab.buf[ab.off+0] = x 3207 ab.buf[ab.off+1] = y 3208 ab.buf[ab.off+2] = z 3209 ab.off += 3 3210} 3211 3212// Put4 appends four bytes to the end of the buffer. 3213func (ab *AsmBuf) Put4(x, y, z, w byte) { 3214 ab.buf[ab.off+0] = x 3215 ab.buf[ab.off+1] = y 3216 ab.buf[ab.off+2] = z 3217 ab.buf[ab.off+3] = w 3218 ab.off += 4 3219} 3220 3221// PutInt16 writes v into the buffer using little-endian encoding. 3222func (ab *AsmBuf) PutInt16(v int16) { 3223 ab.buf[ab.off+0] = byte(v) 3224 ab.buf[ab.off+1] = byte(v >> 8) 3225 ab.off += 2 3226} 3227 3228// PutInt32 writes v into the buffer using little-endian encoding. 3229func (ab *AsmBuf) PutInt32(v int32) { 3230 ab.buf[ab.off+0] = byte(v) 3231 ab.buf[ab.off+1] = byte(v >> 8) 3232 ab.buf[ab.off+2] = byte(v >> 16) 3233 ab.buf[ab.off+3] = byte(v >> 24) 3234 ab.off += 4 3235} 3236 3237// PutInt64 writes v into the buffer using little-endian encoding. 3238func (ab *AsmBuf) PutInt64(v int64) { 3239 ab.buf[ab.off+0] = byte(v) 3240 ab.buf[ab.off+1] = byte(v >> 8) 3241 ab.buf[ab.off+2] = byte(v >> 16) 3242 ab.buf[ab.off+3] = byte(v >> 24) 3243 ab.buf[ab.off+4] = byte(v >> 32) 3244 ab.buf[ab.off+5] = byte(v >> 40) 3245 ab.buf[ab.off+6] = byte(v >> 48) 3246 ab.buf[ab.off+7] = byte(v >> 56) 3247 ab.off += 8 3248} 3249 3250// Put copies b into the buffer. 3251func (ab *AsmBuf) Put(b []byte) { 3252 copy(ab.buf[ab.off:], b) 3253 ab.off += len(b) 3254} 3255 3256// PutOpBytesLit writes zero terminated sequence of bytes from op, 3257// starting at specified offset (e.g. z counter value). 3258// Trailing 0 is not written. 3259// 3260// Intended to be used for literal Z cases. 3261// Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r). 3262func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) { 3263 for int(op[offset]) != 0 { 3264 ab.Put1(byte(op[offset])) 3265 offset++ 3266 } 3267} 3268 3269// Insert inserts b at offset i. 3270func (ab *AsmBuf) Insert(i int, b byte) { 3271 ab.off++ 3272 copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1]) 3273 ab.buf[i] = b 3274} 3275 3276// Last returns the byte at the end of the buffer. 3277func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] } 3278 3279// Len returns the length of the buffer. 3280func (ab *AsmBuf) Len() int { return ab.off } 3281 3282// Bytes returns the contents of the buffer. 3283func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] } 3284 3285// Reset empties the buffer. 3286func (ab *AsmBuf) Reset() { ab.off = 0 } 3287 3288// At returns the byte at offset i. 3289func (ab *AsmBuf) At(i int) byte { return ab.buf[i] } 3290 3291// asmidx emits SIB byte. 3292func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) { 3293 var i int 3294 3295 // X/Y index register is used in VSIB. 3296 switch index { 3297 default: 3298 goto bad 3299 3300 case REG_NONE: 3301 i = 4 << 3 3302 goto bas 3303 3304 case REG_R8, 3305 REG_R9, 3306 REG_R10, 3307 REG_R11, 3308 REG_R12, 3309 REG_R13, 3310 REG_R14, 3311 REG_R15, 3312 REG_X8, 3313 REG_X9, 3314 REG_X10, 3315 REG_X11, 3316 REG_X12, 3317 REG_X13, 3318 REG_X14, 3319 REG_X15, 3320 REG_X16, 3321 REG_X17, 3322 REG_X18, 3323 REG_X19, 3324 REG_X20, 3325 REG_X21, 3326 REG_X22, 3327 REG_X23, 3328 REG_X24, 3329 REG_X25, 3330 REG_X26, 3331 REG_X27, 3332 REG_X28, 3333 REG_X29, 3334 REG_X30, 3335 REG_X31, 3336 REG_Y8, 3337 REG_Y9, 3338 REG_Y10, 3339 REG_Y11, 3340 REG_Y12, 3341 REG_Y13, 3342 REG_Y14, 3343 REG_Y15, 3344 REG_Y16, 3345 REG_Y17, 3346 REG_Y18, 3347 REG_Y19, 3348 REG_Y20, 3349 REG_Y21, 3350 REG_Y22, 3351 REG_Y23, 3352 REG_Y24, 3353 REG_Y25, 3354 REG_Y26, 3355 REG_Y27, 3356 REG_Y28, 3357 REG_Y29, 3358 REG_Y30, 3359 REG_Y31, 3360 REG_Z8, 3361 REG_Z9, 3362 REG_Z10, 3363 REG_Z11, 3364 REG_Z12, 3365 REG_Z13, 3366 REG_Z14, 3367 REG_Z15, 3368 REG_Z16, 3369 REG_Z17, 3370 REG_Z18, 3371 REG_Z19, 3372 REG_Z20, 3373 REG_Z21, 3374 REG_Z22, 3375 REG_Z23, 3376 REG_Z24, 3377 REG_Z25, 3378 REG_Z26, 3379 REG_Z27, 3380 REG_Z28, 3381 REG_Z29, 3382 REG_Z30, 3383 REG_Z31: 3384 if ctxt.Arch.Family == sys.I386 { 3385 goto bad 3386 } 3387 fallthrough 3388 3389 case REG_AX, 3390 REG_CX, 3391 REG_DX, 3392 REG_BX, 3393 REG_BP, 3394 REG_SI, 3395 REG_DI, 3396 REG_X0, 3397 REG_X1, 3398 REG_X2, 3399 REG_X3, 3400 REG_X4, 3401 REG_X5, 3402 REG_X6, 3403 REG_X7, 3404 REG_Y0, 3405 REG_Y1, 3406 REG_Y2, 3407 REG_Y3, 3408 REG_Y4, 3409 REG_Y5, 3410 REG_Y6, 3411 REG_Y7, 3412 REG_Z0, 3413 REG_Z1, 3414 REG_Z2, 3415 REG_Z3, 3416 REG_Z4, 3417 REG_Z5, 3418 REG_Z6, 3419 REG_Z7: 3420 i = reg[index] << 3 3421 } 3422 3423 switch scale { 3424 default: 3425 goto bad 3426 3427 case 1: 3428 break 3429 3430 case 2: 3431 i |= 1 << 6 3432 3433 case 4: 3434 i |= 2 << 6 3435 3436 case 8: 3437 i |= 3 << 6 3438 } 3439 3440bas: 3441 switch base { 3442 default: 3443 goto bad 3444 3445 case REG_NONE: // must be mod=00 3446 i |= 5 3447 3448 case REG_R8, 3449 REG_R9, 3450 REG_R10, 3451 REG_R11, 3452 REG_R12, 3453 REG_R13, 3454 REG_R14, 3455 REG_R15: 3456 if ctxt.Arch.Family == sys.I386 { 3457 goto bad 3458 } 3459 fallthrough 3460 3461 case REG_AX, 3462 REG_CX, 3463 REG_DX, 3464 REG_BX, 3465 REG_SP, 3466 REG_BP, 3467 REG_SI, 3468 REG_DI: 3469 i |= reg[base] 3470 } 3471 3472 ab.Put1(byte(i)) 3473 return 3474 3475bad: 3476 ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base) 3477 ab.Put1(0) 3478} 3479 3480func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) { 3481 var rel obj.Reloc 3482 3483 v := vaddr(ctxt, p, a, &rel) 3484 if rel.Siz != 0 { 3485 if rel.Siz != 4 { 3486 ctxt.Diag("bad reloc") 3487 } 3488 r := obj.Addrel(cursym) 3489 *r = rel 3490 r.Off = int32(p.Pc + int64(ab.Len())) 3491 } 3492 3493 ab.PutInt32(int32(v)) 3494} 3495 3496func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 { 3497 if r != nil { 3498 *r = obj.Reloc{} 3499 } 3500 3501 switch a.Name { 3502 case obj.NAME_STATIC, 3503 obj.NAME_GOTREF, 3504 obj.NAME_EXTERN: 3505 s := a.Sym 3506 if r == nil { 3507 ctxt.Diag("need reloc for %v", obj.Dconv(p, a)) 3508 log.Fatalf("reloc") 3509 } 3510 3511 if a.Name == obj.NAME_GOTREF { 3512 r.Siz = 4 3513 r.Type = objabi.R_GOTPCREL 3514 } else if useAbs(ctxt, s) { 3515 r.Siz = 4 3516 r.Type = objabi.R_ADDR 3517 } else { 3518 r.Siz = 4 3519 r.Type = objabi.R_PCREL 3520 } 3521 3522 r.Off = -1 // caller must fill in 3523 r.Sym = s 3524 r.Add = a.Offset 3525 3526 return 0 3527 } 3528 3529 if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS { 3530 if r == nil { 3531 ctxt.Diag("need reloc for %v", obj.Dconv(p, a)) 3532 log.Fatalf("reloc") 3533 } 3534 3535 if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin { 3536 r.Type = objabi.R_TLS_LE 3537 r.Siz = 4 3538 r.Off = -1 // caller must fill in 3539 r.Add = a.Offset 3540 } 3541 return 0 3542 } 3543 3544 return a.Offset 3545} 3546 3547func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) { 3548 var base int 3549 var rel obj.Reloc 3550 3551 rex &= 0x40 | Rxr 3552 if a.Offset != int64(int32(a.Offset)) { 3553 // The rules are slightly different for 386 and AMD64, 3554 // mostly for historical reasons. We may unify them later, 3555 // but it must be discussed beforehand. 3556 // 3557 // For 64bit mode only LEAL is allowed to overflow. 3558 // It's how https://golang.org/cl/59630 made it. 3559 // crypto/sha1/sha1block_amd64.s depends on this feature. 3560 // 3561 // For 32bit mode rules are more permissive. 3562 // If offset fits uint32, it's permitted. 3563 // This is allowed for assembly that wants to use 32-bit hex 3564 // constants, e.g. LEAL 0x99999999(AX), AX. 3565 overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) || 3566 (ctxt.Arch.Family != sys.AMD64 && 3567 int64(uint32(a.Offset)) == a.Offset && 3568 ab.rexflag&Rxw == 0) 3569 if !overflowOK { 3570 ctxt.Diag("offset too large in %s", p) 3571 } 3572 } 3573 v := int32(a.Offset) 3574 rel.Siz = 0 3575 3576 switch a.Type { 3577 case obj.TYPE_ADDR: 3578 if a.Name == obj.NAME_NONE { 3579 ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE") 3580 } 3581 if a.Index == REG_TLS { 3582 ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS") 3583 } 3584 goto bad 3585 3586 case obj.TYPE_REG: 3587 const regFirst = REG_AL 3588 const regLast = REG_Z31 3589 if a.Reg < regFirst || regLast < a.Reg { 3590 goto bad 3591 } 3592 if v != 0 { 3593 goto bad 3594 } 3595 ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3)) 3596 ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex 3597 return 3598 } 3599 3600 if a.Type != obj.TYPE_MEM { 3601 goto bad 3602 } 3603 3604 if a.Index != REG_NONE && a.Index != REG_TLS { 3605 base := int(a.Reg) 3606 switch a.Name { 3607 case obj.NAME_EXTERN, 3608 obj.NAME_GOTREF, 3609 obj.NAME_STATIC: 3610 if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 { 3611 goto bad 3612 } 3613 if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared { 3614 // The base register has already been set. It holds the PC 3615 // of this instruction returned by a PC-reading thunk. 3616 // See obj6.go:rewriteToPcrel. 3617 } else { 3618 base = REG_NONE 3619 } 3620 v = int32(vaddr(ctxt, p, a, &rel)) 3621 3622 case obj.NAME_AUTO, 3623 obj.NAME_PARAM: 3624 base = REG_SP 3625 } 3626 3627 ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex 3628 if base == REG_NONE { 3629 ab.Put1(byte(0<<6 | 4<<0 | r<<3)) 3630 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3631 goto putrelv 3632 } 3633 3634 if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 { 3635 ab.Put1(byte(0<<6 | 4<<0 | r<<3)) 3636 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3637 return 3638 } 3639 3640 if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 { 3641 ab.Put1(byte(1<<6 | 4<<0 | r<<3)) 3642 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3643 ab.Put1(disp8) 3644 return 3645 } 3646 3647 ab.Put1(byte(2<<6 | 4<<0 | r<<3)) 3648 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3649 goto putrelv 3650 } 3651 3652 base = int(a.Reg) 3653 switch a.Name { 3654 case obj.NAME_STATIC, 3655 obj.NAME_GOTREF, 3656 obj.NAME_EXTERN: 3657 if a.Sym == nil { 3658 ctxt.Diag("bad addr: %v", p) 3659 } 3660 if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared { 3661 // The base register has already been set. It holds the PC 3662 // of this instruction returned by a PC-reading thunk. 3663 // See obj6.go:rewriteToPcrel. 3664 } else { 3665 base = REG_NONE 3666 } 3667 v = int32(vaddr(ctxt, p, a, &rel)) 3668 3669 case obj.NAME_AUTO, 3670 obj.NAME_PARAM: 3671 base = REG_SP 3672 } 3673 3674 if base == REG_TLS { 3675 v = int32(vaddr(ctxt, p, a, &rel)) 3676 } 3677 3678 ab.rexflag |= regrex[base]&Rxb | rex 3679 if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS { 3680 if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 { 3681 if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) { 3682 ctxt.Diag("%v has offset against gotref", p) 3683 } 3684 ab.Put1(byte(0<<6 | 5<<0 | r<<3)) 3685 goto putrelv 3686 } 3687 3688 // temporary 3689 ab.Put2( 3690 byte(0<<6|4<<0|r<<3), // sib present 3691 0<<6|4<<3|5<<0, // DS:d32 3692 ) 3693 goto putrelv 3694 } 3695 3696 if base == REG_SP || base == REG_R12 { 3697 if v == 0 { 3698 ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3)) 3699 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) 3700 return 3701 } 3702 3703 if disp8, ok := toDisp8(v, p, ab); ok { 3704 ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3)) 3705 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) 3706 ab.Put1(disp8) 3707 return 3708 } 3709 3710 ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3)) 3711 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) 3712 goto putrelv 3713 } 3714 3715 if REG_AX <= base && base <= REG_R15 { 3716 if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid { 3717 rel = obj.Reloc{} 3718 rel.Type = objabi.R_TLS_LE 3719 rel.Siz = 4 3720 rel.Sym = nil 3721 rel.Add = int64(v) 3722 v = 0 3723 } 3724 3725 if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 { 3726 ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3)) 3727 return 3728 } 3729 3730 if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 { 3731 ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8) 3732 return 3733 } 3734 3735 ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3)) 3736 goto putrelv 3737 } 3738 3739 goto bad 3740 3741putrelv: 3742 if rel.Siz != 0 { 3743 if rel.Siz != 4 { 3744 ctxt.Diag("bad rel") 3745 goto bad 3746 } 3747 3748 r := obj.Addrel(cursym) 3749 *r = rel 3750 r.Off = int32(p.Pc + int64(ab.Len())) 3751 } 3752 3753 ab.PutInt32(v) 3754 return 3755 3756bad: 3757 ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a)) 3758} 3759 3760func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) { 3761 ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0) 3762} 3763 3764func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) { 3765 ab.asmandsz(ctxt, cursym, p, a, o, 0, 0) 3766} 3767 3768func bytereg(a *obj.Addr, t *uint8) { 3769 if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) { 3770 a.Reg += REG_AL - REG_AX 3771 *t = 0 3772 } 3773} 3774 3775func unbytereg(a *obj.Addr, t *uint8) { 3776 if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) { 3777 a.Reg += REG_AX - REG_AL 3778 *t = 0 3779 } 3780} 3781 3782const ( 3783 movLit uint8 = iota // Like Zlit 3784 movRegMem 3785 movMemReg 3786 movRegMem2op 3787 movMemReg2op 3788 movFullPtr // Load full pointer, trash heap (unsupported) 3789 movDoubleShift 3790 movTLSReg 3791) 3792 3793var ymovtab = []movtab{ 3794 // push 3795 {APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}}, 3796 {APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}}, 3797 {APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}}, 3798 {APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}}, 3799 {APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}}, 3800 {APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}}, 3801 {APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}}, 3802 {APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}}, 3803 {APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}}, 3804 {APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}}, 3805 {APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}}, 3806 {APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}}, 3807 {APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}}, 3808 {APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}}, 3809 3810 // pop 3811 {APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}}, 3812 {APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}}, 3813 {APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}}, 3814 {APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}}, 3815 {APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}}, 3816 {APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}}, 3817 {APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}}, 3818 {APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}}, 3819 {APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}}, 3820 {APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}}, 3821 {APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}}, 3822 {APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}}, 3823 3824 // mov seg 3825 {AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}}, 3826 {AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}}, 3827 {AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}}, 3828 {AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}}, 3829 {AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}}, 3830 {AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}}, 3831 {AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}}, 3832 {AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}}, 3833 {AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}}, 3834 {AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}}, 3835 {AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}}, 3836 {AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}}, 3837 3838 // mov cr 3839 {AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}}, 3840 {AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}}, 3841 {AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}}, 3842 {AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}}, 3843 {AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}}, 3844 {AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}}, 3845 {AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}}, 3846 {AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}}, 3847 {AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}}, 3848 {AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}}, 3849 {AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}}, 3850 {AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}}, 3851 {AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}}, 3852 {AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}}, 3853 {AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}}, 3854 {AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}}, 3855 {AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}}, 3856 {AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}}, 3857 {AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}}, 3858 {AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}}, 3859 3860 // mov dr 3861 {AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}}, 3862 {AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}}, 3863 {AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}}, 3864 {AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}}, 3865 {AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}}, 3866 {AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}}, 3867 {AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}}, 3868 {AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}}, 3869 {AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}}, 3870 {AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}}, 3871 {AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}}, 3872 {AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}}, 3873 {AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}}, 3874 {AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}}, 3875 {AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}}, 3876 {AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}}, 3877 3878 // mov tr 3879 {AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}}, 3880 {AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}}, 3881 {AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}}, 3882 {AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}}, 3883 3884 // lgdt, sgdt, lidt, sidt 3885 {AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}}, 3886 {AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}}, 3887 {AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}}, 3888 {AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}}, 3889 {AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}}, 3890 {AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}}, 3891 {AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}}, 3892 {AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}}, 3893 3894 // lldt, sldt 3895 {AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}}, 3896 {AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}}, 3897 3898 // lmsw, smsw 3899 {AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}}, 3900 {AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}}, 3901 3902 // ltr, str 3903 {AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}}, 3904 {AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}}, 3905 3906 /* load full pointer - unsupported 3907 {AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}}, 3908 {AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}}, 3909 */ 3910 3911 // double shift 3912 {ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, 3913 {ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, 3914 {ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, 3915 {ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, 3916 {ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, 3917 {ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, 3918 {ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, 3919 {ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, 3920 {ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, 3921 {ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, 3922 {ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, 3923 {ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, 3924 {ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, 3925 {ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, 3926 {ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, 3927 {ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, 3928 {ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, 3929 {ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, 3930 3931 // load TLS base 3932 {AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}}, 3933 {AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}}, 3934 {0, 0, 0, 0, 0, [4]uint8{}}, 3935} 3936 3937func isax(a *obj.Addr) bool { 3938 switch a.Reg { 3939 case REG_AX, REG_AL, REG_AH: 3940 return true 3941 } 3942 3943 if a.Index == REG_AX { 3944 return true 3945 } 3946 return false 3947} 3948 3949func subreg(p *obj.Prog, from int, to int) { 3950 if false { /* debug['Q'] */ 3951 fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to)) 3952 } 3953 3954 if int(p.From.Reg) == from { 3955 p.From.Reg = int16(to) 3956 p.Ft = 0 3957 } 3958 3959 if int(p.To.Reg) == from { 3960 p.To.Reg = int16(to) 3961 p.Tt = 0 3962 } 3963 3964 if int(p.From.Index) == from { 3965 p.From.Index = int16(to) 3966 p.Ft = 0 3967 } 3968 3969 if int(p.To.Index) == from { 3970 p.To.Index = int16(to) 3971 p.Tt = 0 3972 } 3973 3974 if false { /* debug['Q'] */ 3975 fmt.Printf("%v\n", p) 3976 } 3977} 3978 3979func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int { 3980 switch op { 3981 case Pm, Pe, Pf2, Pf3: 3982 if osize != 1 { 3983 if op != Pm { 3984 ab.Put1(byte(op)) 3985 } 3986 ab.Put1(Pm) 3987 z++ 3988 op = int(o.op[z]) 3989 break 3990 } 3991 fallthrough 3992 3993 default: 3994 if ab.Len() == 0 || ab.Last() != Pm { 3995 ab.Put1(Pm) 3996 } 3997 } 3998 3999 ab.Put1(byte(op)) 4000 return z 4001} 4002 4003var bpduff1 = []byte{ 4004 0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP) 4005 0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP 4006} 4007 4008var bpduff2 = []byte{ 4009 0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP 4010} 4011 4012// asmevex emits EVEX pregis and opcode byte. 4013// In addition to asmvex r/m, vvvv and reg fields also requires optional 4014// K-masking register. 4015// 4016// Expects asmbuf.evex to be properly initialized. 4017func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) { 4018 ab.evexflag = true 4019 evex := ab.evex 4020 4021 rexR := byte(1) 4022 evexR := byte(1) 4023 rexX := byte(1) 4024 rexB := byte(1) 4025 if r != nil { 4026 if regrex[r.Reg]&Rxr != 0 { 4027 rexR = 0 // "ModR/M.reg" selector 4th bit. 4028 } 4029 if regrex[r.Reg]&RxrEvex != 0 { 4030 evexR = 0 // "ModR/M.reg" selector 5th bit. 4031 } 4032 } 4033 if rm != nil { 4034 if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 { 4035 rexX = 0 4036 } else if regrex[rm.Index]&Rxx != 0 { 4037 rexX = 0 4038 } 4039 if regrex[rm.Reg]&Rxb != 0 { 4040 rexB = 0 4041 } 4042 } 4043 // P0 = [R][X][B][R'][00][mm] 4044 p0 := (rexR << 7) | 4045 (rexX << 6) | 4046 (rexB << 5) | 4047 (evexR << 4) | 4048 (0 << 2) | 4049 (evex.M() << 0) 4050 4051 vexV := byte(0) 4052 if v != nil { 4053 // 4bit-wide reg index. 4054 vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF 4055 } 4056 vexV ^= 0x0F 4057 // P1 = [W][vvvv][1][pp] 4058 p1 := (evex.W() << 7) | 4059 (vexV << 3) | 4060 (1 << 2) | 4061 (evex.P() << 0) 4062 4063 suffix := evexSuffixMap[p.Scond] 4064 evexZ := byte(0) 4065 evexLL := evex.L() 4066 evexB := byte(0) 4067 evexV := byte(1) 4068 evexA := byte(0) 4069 if suffix.zeroing { 4070 if !evex.ZeroingEnabled() { 4071 ctxt.Diag("unsupported zeroing: %v", p) 4072 } 4073 evexZ = 1 4074 } 4075 switch { 4076 case suffix.rounding != rcUnset: 4077 if rm != nil && rm.Type == obj.TYPE_MEM { 4078 ctxt.Diag("illegal rounding with memory argument: %v", p) 4079 } else if !evex.RoundingEnabled() { 4080 ctxt.Diag("unsupported rounding: %v", p) 4081 } 4082 evexB = 1 4083 evexLL = suffix.rounding 4084 case suffix.broadcast: 4085 if rm == nil || rm.Type != obj.TYPE_MEM { 4086 ctxt.Diag("illegal broadcast without memory argument: %v", p) 4087 } else if !evex.BroadcastEnabled() { 4088 ctxt.Diag("unsupported broadcast: %v", p) 4089 } 4090 evexB = 1 4091 case suffix.sae: 4092 if rm != nil && rm.Type == obj.TYPE_MEM { 4093 ctxt.Diag("illegal SAE with memory argument: %v", p) 4094 } else if !evex.SaeEnabled() { 4095 ctxt.Diag("unsupported SAE: %v", p) 4096 } 4097 evexB = 1 4098 } 4099 if rm != nil && regrex[rm.Index]&RxrEvex != 0 { 4100 evexV = 0 4101 } else if v != nil && regrex[v.Reg]&RxrEvex != 0 { 4102 evexV = 0 // VSR selector 5th bit. 4103 } 4104 if k != nil { 4105 evexA = byte(reg[k.Reg]) 4106 } 4107 // P2 = [z][L'L][b][V'][aaa] 4108 p2 := (evexZ << 7) | 4109 (evexLL << 5) | 4110 (evexB << 4) | 4111 (evexV << 3) | 4112 (evexA << 0) 4113 4114 const evexEscapeByte = 0x62 4115 ab.Put4(evexEscapeByte, p0, p1, p2) 4116 ab.Put1(evex.opcode) 4117} 4118 4119// Emit VEX prefix and opcode byte. 4120// The three addresses are the r/m, vvvv, and reg fields. 4121// The reg and rm arguments appear in the same order as the 4122// arguments to asmand, which typically follows the call to asmvex. 4123// The final two arguments are the VEX prefix (see encoding above) 4124// and the opcode byte. 4125// For details about vex prefix see: 4126// https://en.wikipedia.org/wiki/VEX_prefix#Technical_description 4127func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) { 4128 ab.vexflag = true 4129 rexR := 0 4130 if r != nil { 4131 rexR = regrex[r.Reg] & Rxr 4132 } 4133 rexB := 0 4134 rexX := 0 4135 if rm != nil { 4136 rexB = regrex[rm.Reg] & Rxb 4137 rexX = regrex[rm.Index] & Rxx 4138 } 4139 vexM := (vex >> 3) & 0x7 4140 vexWLP := vex & 0x87 4141 vexV := byte(0) 4142 if v != nil { 4143 vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF 4144 } 4145 vexV ^= 0xF 4146 if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 { 4147 // Can use 2-byte encoding. 4148 ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP) 4149 } else { 4150 // Must use 3-byte encoding. 4151 ab.Put3(0xc4, 4152 (byte(rexR|rexX|rexB)<<5)^0xE0|vexM, 4153 vexV<<3|vexWLP, 4154 ) 4155 } 4156 ab.Put1(opcode) 4157} 4158 4159// regIndex returns register index that fits in 5 bits. 4160// 4161// R : 3 bit | legacy instructions | N/A 4162// [R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr 4163// EVEX.R : 1 bit | EVEX extension bit | RxrEvex 4164// 4165// Examples: 4166// REG_Z30 => 30 4167// REG_X15 => 15 4168// REG_R9 => 9 4169// REG_AX => 0 4170// 4171func regIndex(r int16) int { 4172 lower3bits := reg[r] 4173 high4bit := regrex[r] & Rxr << 1 4174 high5bit := regrex[r] & RxrEvex << 0 4175 return lower3bits | high4bit | high5bit 4176} 4177 4178// avx2gatherValid reports whether p satisfies AVX2 gather constraints. 4179// Reports errors via ctxt. 4180func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool { 4181 // If any pair of the index, mask, or destination registers 4182 // are the same, illegal instruction trap (#UD) is triggered. 4183 index := regIndex(p.GetFrom3().Index) 4184 mask := regIndex(p.From.Reg) 4185 dest := regIndex(p.To.Reg) 4186 if dest == mask || dest == index || mask == index { 4187 ctxt.Diag("mask, index, and destination registers should be distinct: %v", p) 4188 return false 4189 } 4190 4191 return true 4192} 4193 4194// avx512gatherValid reports whether p satisfies AVX512 gather constraints. 4195// Reports errors via ctxt. 4196func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool { 4197 // Illegal instruction trap (#UD) is triggered if the destination vector 4198 // register is the same as index vector in VSIB. 4199 index := regIndex(p.From.Index) 4200 dest := regIndex(p.To.Reg) 4201 if dest == index { 4202 ctxt.Diag("index and destination registers should be distinct: %v", p) 4203 return false 4204 } 4205 4206 return true 4207} 4208 4209func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { 4210 o := opindex[p.As&obj.AMask] 4211 4212 if o == nil { 4213 ctxt.Diag("asmins: missing op %v", p) 4214 return 4215 } 4216 4217 if pre := prefixof(ctxt, &p.From); pre != 0 { 4218 ab.Put1(byte(pre)) 4219 } 4220 if pre := prefixof(ctxt, &p.To); pre != 0 { 4221 ab.Put1(byte(pre)) 4222 } 4223 4224 // Checks to warn about instruction/arguments combinations that 4225 // will unconditionally trigger illegal instruction trap (#UD). 4226 switch p.As { 4227 case AVGATHERDPD, 4228 AVGATHERQPD, 4229 AVGATHERDPS, 4230 AVGATHERQPS, 4231 AVPGATHERDD, 4232 AVPGATHERQD, 4233 AVPGATHERDQ, 4234 AVPGATHERQQ: 4235 // AVX512 gather requires explicit K mask. 4236 if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 { 4237 if !avx512gatherValid(ctxt, p) { 4238 return 4239 } 4240 } else { 4241 if !avx2gatherValid(ctxt, p) { 4242 return 4243 } 4244 } 4245 } 4246 4247 if p.Ft == 0 { 4248 p.Ft = uint8(oclass(ctxt, p, &p.From)) 4249 } 4250 if p.Tt == 0 { 4251 p.Tt = uint8(oclass(ctxt, p, &p.To)) 4252 } 4253 4254 ft := int(p.Ft) * Ymax 4255 var f3t int 4256 tt := int(p.Tt) * Ymax 4257 4258 xo := obj.Bool2int(o.op[0] == 0x0f) 4259 z := 0 4260 var a *obj.Addr 4261 var l int 4262 var op int 4263 var q *obj.Prog 4264 var r *obj.Reloc 4265 var rel obj.Reloc 4266 var v int64 4267 4268 args := make([]int, 0, argListMax) 4269 if ft != Ynone*Ymax { 4270 args = append(args, ft) 4271 } 4272 for i := range p.RestArgs { 4273 args = append(args, oclass(ctxt, p, &p.RestArgs[i].Addr)*Ymax) 4274 } 4275 if tt != Ynone*Ymax { 4276 args = append(args, tt) 4277 } 4278 4279 for _, yt := range o.ytab { 4280 // ytab matching is purely args-based, 4281 // but AVX512 suffixes like "Z" or "RU_SAE" will 4282 // add EVEX-only filter that will reject non-EVEX matches. 4283 // 4284 // Consider "VADDPD.BCST 2032(DX), X0, X0". 4285 // Without this rule, operands will lead to VEX-encoded form 4286 // and produce "c5b15813" encoding. 4287 if !yt.match(args) { 4288 // "xo" is always zero for VEX/EVEX encoded insts. 4289 z += int(yt.zoffset) + xo 4290 } else { 4291 if p.Scond != 0 && !evexZcase(yt.zcase) { 4292 // Do not signal error and continue to search 4293 // for matching EVEX-encoded form. 4294 z += int(yt.zoffset) 4295 continue 4296 } 4297 4298 switch o.prefix { 4299 case Px1: // first option valid only in 32-bit mode 4300 if ctxt.Arch.Family == sys.AMD64 && z == 0 { 4301 z += int(yt.zoffset) + xo 4302 continue 4303 } 4304 case Pq: // 16 bit escape and opcode escape 4305 ab.Put2(Pe, Pm) 4306 4307 case Pq3: // 16 bit escape and opcode escape + REX.W 4308 ab.rexflag |= Pw 4309 ab.Put2(Pe, Pm) 4310 4311 case Pq4: // 66 0F 38 4312 ab.Put3(0x66, 0x0F, 0x38) 4313 4314 case Pq4w: // 66 0F 38 + REX.W 4315 ab.rexflag |= Pw 4316 ab.Put3(0x66, 0x0F, 0x38) 4317 4318 case Pq5: // F3 0F 38 4319 ab.Put3(0xF3, 0x0F, 0x38) 4320 4321 case Pq5w: // F3 0F 38 + REX.W 4322 ab.rexflag |= Pw 4323 ab.Put3(0xF3, 0x0F, 0x38) 4324 4325 case Pf2, // xmm opcode escape 4326 Pf3: 4327 ab.Put2(o.prefix, Pm) 4328 4329 case Pef3: 4330 ab.Put3(Pe, Pf3, Pm) 4331 4332 case Pfw: // xmm opcode escape + REX.W 4333 ab.rexflag |= Pw 4334 ab.Put2(Pf3, Pm) 4335 4336 case Pm: // opcode escape 4337 ab.Put1(Pm) 4338 4339 case Pe: // 16 bit escape 4340 ab.Put1(Pe) 4341 4342 case Pw: // 64-bit escape 4343 if ctxt.Arch.Family != sys.AMD64 { 4344 ctxt.Diag("asmins: illegal 64: %v", p) 4345 } 4346 ab.rexflag |= Pw 4347 4348 case Pw8: // 64-bit escape if z >= 8 4349 if z >= 8 { 4350 if ctxt.Arch.Family != sys.AMD64 { 4351 ctxt.Diag("asmins: illegal 64: %v", p) 4352 } 4353 ab.rexflag |= Pw 4354 } 4355 4356 case Pb: // botch 4357 if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) { 4358 goto bad 4359 } 4360 // NOTE(rsc): This is probably safe to do always, 4361 // but when enabled it chooses different encodings 4362 // than the old cmd/internal/obj/i386 code did, 4363 // which breaks our "same bits out" checks. 4364 // In particular, CMPB AX, $0 encodes as 80 f8 00 4365 // in the original obj/i386, and it would encode 4366 // (using a valid, shorter form) as 3c 00 if we enabled 4367 // the call to bytereg here. 4368 if ctxt.Arch.Family == sys.AMD64 { 4369 bytereg(&p.From, &p.Ft) 4370 bytereg(&p.To, &p.Tt) 4371 } 4372 4373 case P32: // 32 bit but illegal if 64-bit mode 4374 if ctxt.Arch.Family == sys.AMD64 { 4375 ctxt.Diag("asmins: illegal in 64-bit mode: %v", p) 4376 } 4377 4378 case Py: // 64-bit only, no prefix 4379 if ctxt.Arch.Family != sys.AMD64 { 4380 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) 4381 } 4382 4383 case Py1: // 64-bit only if z < 1, no prefix 4384 if z < 1 && ctxt.Arch.Family != sys.AMD64 { 4385 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) 4386 } 4387 4388 case Py3: // 64-bit only if z < 3, no prefix 4389 if z < 3 && ctxt.Arch.Family != sys.AMD64 { 4390 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) 4391 } 4392 } 4393 4394 if z >= len(o.op) { 4395 log.Fatalf("asmins bad table %v", p) 4396 } 4397 op = int(o.op[z]) 4398 if op == 0x0f { 4399 ab.Put1(byte(op)) 4400 z++ 4401 op = int(o.op[z]) 4402 } 4403 4404 switch yt.zcase { 4405 default: 4406 ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p) 4407 return 4408 4409 case Zpseudo: 4410 break 4411 4412 case Zlit: 4413 ab.PutOpBytesLit(z, &o.op) 4414 4415 case Zlitr_m: 4416 ab.PutOpBytesLit(z, &o.op) 4417 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4418 4419 case Zlitm_r: 4420 ab.PutOpBytesLit(z, &o.op) 4421 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4422 4423 case Zlit_m_r: 4424 ab.PutOpBytesLit(z, &o.op) 4425 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4426 4427 case Zmb_r: 4428 bytereg(&p.From, &p.Ft) 4429 fallthrough 4430 4431 case Zm_r: 4432 ab.Put1(byte(op)) 4433 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4434 4435 case Z_m_r: 4436 ab.Put1(byte(op)) 4437 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4438 4439 case Zm2_r: 4440 ab.Put2(byte(op), o.op[z+1]) 4441 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4442 4443 case Zm_r_xm: 4444 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4445 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4446 4447 case Zm_r_xm_nr: 4448 ab.rexflag = 0 4449 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4450 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4451 4452 case Zm_r_i_xm: 4453 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4454 ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3()) 4455 ab.Put1(byte(p.To.Offset)) 4456 4457 case Zibm_r, Zibr_m: 4458 ab.PutOpBytesLit(z, &o.op) 4459 if yt.zcase == Zibr_m { 4460 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) 4461 } else { 4462 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4463 } 4464 switch { 4465 default: 4466 ab.Put1(byte(p.From.Offset)) 4467 case yt.args[0] == Yi32 && o.prefix == Pe: 4468 ab.PutInt16(int16(p.From.Offset)) 4469 case yt.args[0] == Yi32: 4470 ab.PutInt32(int32(p.From.Offset)) 4471 } 4472 4473 case Zaut_r: 4474 ab.Put1(0x8d) // leal 4475 if p.From.Type != obj.TYPE_ADDR { 4476 ctxt.Diag("asmins: Zaut sb type ADDR") 4477 } 4478 p.From.Type = obj.TYPE_MEM 4479 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4480 p.From.Type = obj.TYPE_ADDR 4481 4482 case Zm_o: 4483 ab.Put1(byte(op)) 4484 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) 4485 4486 case Zr_m: 4487 ab.Put1(byte(op)) 4488 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4489 4490 case Zvex: 4491 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) 4492 4493 case Zvex_rm_v_r: 4494 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) 4495 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4496 4497 case Zvex_rm_v_ro: 4498 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) 4499 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2])) 4500 4501 case Zvex_i_rm_vo: 4502 ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1]) 4503 ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2])) 4504 ab.Put1(byte(p.From.Offset)) 4505 4506 case Zvex_i_r_v: 4507 ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1]) 4508 regnum := byte(0x7) 4509 if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 { 4510 regnum &= byte(p.GetFrom3().Reg - REG_X0) 4511 } else { 4512 regnum &= byte(p.GetFrom3().Reg - REG_Y0) 4513 } 4514 ab.Put1(o.op[z+2] | regnum) 4515 ab.Put1(byte(p.From.Offset)) 4516 4517 case Zvex_i_rm_v_r: 4518 imm, from, from3, to := unpackOps4(p) 4519 ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1]) 4520 ab.asmand(ctxt, cursym, p, from, to) 4521 ab.Put1(byte(imm.Offset)) 4522 4523 case Zvex_i_rm_r: 4524 ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1]) 4525 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4526 ab.Put1(byte(p.From.Offset)) 4527 4528 case Zvex_v_rm_r: 4529 ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1]) 4530 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4531 4532 case Zvex_r_v_rm: 4533 ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1]) 4534 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4535 4536 case Zvex_rm_r_vo: 4537 ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1]) 4538 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2])) 4539 4540 case Zvex_i_r_rm: 4541 ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1]) 4542 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) 4543 ab.Put1(byte(p.From.Offset)) 4544 4545 case Zvex_hr_rm_v_r: 4546 hr, from, from3, to := unpackOps4(p) 4547 ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1]) 4548 ab.asmand(ctxt, cursym, p, from, to) 4549 ab.Put1(byte(regIndex(hr.Reg) << 4)) 4550 4551 case Zevex_k_rmo: 4552 ab.evex = newEVEXBits(z, &o.op) 4553 ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From) 4554 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3])) 4555 4556 case Zevex_i_rm_vo: 4557 ab.evex = newEVEXBits(z, &o.op) 4558 ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil) 4559 ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3])) 4560 ab.Put1(byte(p.From.Offset)) 4561 4562 case Zevex_i_rm_k_vo: 4563 imm, from, kmask, to := unpackOps4(p) 4564 ab.evex = newEVEXBits(z, &o.op) 4565 ab.asmevex(ctxt, p, from, to, nil, kmask) 4566 ab.asmando(ctxt, cursym, p, from, int(o.op[z+3])) 4567 ab.Put1(byte(imm.Offset)) 4568 4569 case Zevex_i_r_rm: 4570 ab.evex = newEVEXBits(z, &o.op) 4571 ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil) 4572 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) 4573 ab.Put1(byte(p.From.Offset)) 4574 4575 case Zevex_i_r_k_rm: 4576 imm, from, kmask, to := unpackOps4(p) 4577 ab.evex = newEVEXBits(z, &o.op) 4578 ab.asmevex(ctxt, p, to, nil, from, kmask) 4579 ab.asmand(ctxt, cursym, p, to, from) 4580 ab.Put1(byte(imm.Offset)) 4581 4582 case Zevex_i_rm_r: 4583 ab.evex = newEVEXBits(z, &o.op) 4584 ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil) 4585 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4586 ab.Put1(byte(p.From.Offset)) 4587 4588 case Zevex_i_rm_k_r: 4589 imm, from, kmask, to := unpackOps4(p) 4590 ab.evex = newEVEXBits(z, &o.op) 4591 ab.asmevex(ctxt, p, from, nil, to, kmask) 4592 ab.asmand(ctxt, cursym, p, from, to) 4593 ab.Put1(byte(imm.Offset)) 4594 4595 case Zevex_i_rm_v_r: 4596 imm, from, from3, to := unpackOps4(p) 4597 ab.evex = newEVEXBits(z, &o.op) 4598 ab.asmevex(ctxt, p, from, from3, to, nil) 4599 ab.asmand(ctxt, cursym, p, from, to) 4600 ab.Put1(byte(imm.Offset)) 4601 4602 case Zevex_i_rm_v_k_r: 4603 imm, from, from3, kmask, to := unpackOps5(p) 4604 ab.evex = newEVEXBits(z, &o.op) 4605 ab.asmevex(ctxt, p, from, from3, to, kmask) 4606 ab.asmand(ctxt, cursym, p, from, to) 4607 ab.Put1(byte(imm.Offset)) 4608 4609 case Zevex_r_v_rm: 4610 ab.evex = newEVEXBits(z, &o.op) 4611 ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil) 4612 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4613 4614 case Zevex_rm_v_r: 4615 ab.evex = newEVEXBits(z, &o.op) 4616 ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil) 4617 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4618 4619 case Zevex_rm_k_r: 4620 ab.evex = newEVEXBits(z, &o.op) 4621 ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3()) 4622 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4623 4624 case Zevex_r_k_rm: 4625 ab.evex = newEVEXBits(z, &o.op) 4626 ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3()) 4627 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4628 4629 case Zevex_rm_v_k_r: 4630 from, from3, kmask, to := unpackOps4(p) 4631 ab.evex = newEVEXBits(z, &o.op) 4632 ab.asmevex(ctxt, p, from, from3, to, kmask) 4633 ab.asmand(ctxt, cursym, p, from, to) 4634 4635 case Zevex_r_v_k_rm: 4636 from, from3, kmask, to := unpackOps4(p) 4637 ab.evex = newEVEXBits(z, &o.op) 4638 ab.asmevex(ctxt, p, to, from3, from, kmask) 4639 ab.asmand(ctxt, cursym, p, to, from) 4640 4641 case Zr_m_xm: 4642 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4643 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4644 4645 case Zr_m_xm_nr: 4646 ab.rexflag = 0 4647 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4648 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4649 4650 case Zo_m: 4651 ab.Put1(byte(op)) 4652 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4653 4654 case Zcallindreg: 4655 r = obj.Addrel(cursym) 4656 r.Off = int32(p.Pc) 4657 r.Type = objabi.R_CALLIND 4658 r.Siz = 0 4659 fallthrough 4660 4661 case Zo_m64: 4662 ab.Put1(byte(op)) 4663 ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1) 4664 4665 case Zm_ibo: 4666 ab.Put1(byte(op)) 4667 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) 4668 ab.Put1(byte(vaddr(ctxt, p, &p.To, nil))) 4669 4670 case Zibo_m: 4671 ab.Put1(byte(op)) 4672 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4673 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) 4674 4675 case Zibo_m_xm: 4676 z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4677 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4678 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) 4679 4680 case Z_ib, Zib_: 4681 if yt.zcase == Zib_ { 4682 a = &p.From 4683 } else { 4684 a = &p.To 4685 } 4686 ab.Put1(byte(op)) 4687 if p.As == AXABORT { 4688 ab.Put1(o.op[z+1]) 4689 } 4690 ab.Put1(byte(vaddr(ctxt, p, a, nil))) 4691 4692 case Zib_rp: 4693 ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40) 4694 ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil))) 4695 4696 case Zil_rp: 4697 ab.rexflag |= regrex[p.To.Reg] & Rxb 4698 ab.Put1(byte(op + reg[p.To.Reg])) 4699 if o.prefix == Pe { 4700 v = vaddr(ctxt, p, &p.From, nil) 4701 ab.PutInt16(int16(v)) 4702 } else { 4703 ab.relput4(ctxt, cursym, p, &p.From) 4704 } 4705 4706 case Zo_iw: 4707 ab.Put1(byte(op)) 4708 if p.From.Type != obj.TYPE_NONE { 4709 v = vaddr(ctxt, p, &p.From, nil) 4710 ab.PutInt16(int16(v)) 4711 } 4712 4713 case Ziq_rp: 4714 v = vaddr(ctxt, p, &p.From, &rel) 4715 l = int(v >> 32) 4716 if l == 0 && rel.Siz != 8 { 4717 ab.rexflag &^= (0x40 | Rxw) 4718 4719 ab.rexflag |= regrex[p.To.Reg] & Rxb 4720 ab.Put1(byte(0xb8 + reg[p.To.Reg])) 4721 if rel.Type != 0 { 4722 r = obj.Addrel(cursym) 4723 *r = rel 4724 r.Off = int32(p.Pc + int64(ab.Len())) 4725 } 4726 4727 ab.PutInt32(int32(v)) 4728 } else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend 4729 ab.Put1(0xc7) 4730 ab.asmando(ctxt, cursym, p, &p.To, 0) 4731 4732 ab.PutInt32(int32(v)) // need all 8 4733 } else { 4734 ab.rexflag |= regrex[p.To.Reg] & Rxb 4735 ab.Put1(byte(op + reg[p.To.Reg])) 4736 if rel.Type != 0 { 4737 r = obj.Addrel(cursym) 4738 *r = rel 4739 r.Off = int32(p.Pc + int64(ab.Len())) 4740 } 4741 4742 ab.PutInt64(v) 4743 } 4744 4745 case Zib_rr: 4746 ab.Put1(byte(op)) 4747 ab.asmand(ctxt, cursym, p, &p.To, &p.To) 4748 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) 4749 4750 case Z_il, Zil_: 4751 if yt.zcase == Zil_ { 4752 a = &p.From 4753 } else { 4754 a = &p.To 4755 } 4756 ab.Put1(byte(op)) 4757 if o.prefix == Pe { 4758 v = vaddr(ctxt, p, a, nil) 4759 ab.PutInt16(int16(v)) 4760 } else { 4761 ab.relput4(ctxt, cursym, p, a) 4762 } 4763 4764 case Zm_ilo, Zilo_m: 4765 ab.Put1(byte(op)) 4766 if yt.zcase == Zilo_m { 4767 a = &p.From 4768 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4769 } else { 4770 a = &p.To 4771 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) 4772 } 4773 4774 if o.prefix == Pe { 4775 v = vaddr(ctxt, p, a, nil) 4776 ab.PutInt16(int16(v)) 4777 } else { 4778 ab.relput4(ctxt, cursym, p, a) 4779 } 4780 4781 case Zil_rr: 4782 ab.Put1(byte(op)) 4783 ab.asmand(ctxt, cursym, p, &p.To, &p.To) 4784 if o.prefix == Pe { 4785 v = vaddr(ctxt, p, &p.From, nil) 4786 ab.PutInt16(int16(v)) 4787 } else { 4788 ab.relput4(ctxt, cursym, p, &p.From) 4789 } 4790 4791 case Z_rp: 4792 ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40) 4793 ab.Put1(byte(op + reg[p.To.Reg])) 4794 4795 case Zrp_: 4796 ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40) 4797 ab.Put1(byte(op + reg[p.From.Reg])) 4798 4799 case Zcallcon, Zjmpcon: 4800 if yt.zcase == Zcallcon { 4801 ab.Put1(byte(op)) 4802 } else { 4803 ab.Put1(o.op[z+1]) 4804 } 4805 r = obj.Addrel(cursym) 4806 r.Off = int32(p.Pc + int64(ab.Len())) 4807 r.Type = objabi.R_PCREL 4808 r.Siz = 4 4809 r.Add = p.To.Offset 4810 ab.PutInt32(0) 4811 4812 case Zcallind: 4813 ab.Put2(byte(op), o.op[z+1]) 4814 r = obj.Addrel(cursym) 4815 r.Off = int32(p.Pc + int64(ab.Len())) 4816 if ctxt.Arch.Family == sys.AMD64 { 4817 r.Type = objabi.R_PCREL 4818 } else { 4819 r.Type = objabi.R_ADDR 4820 } 4821 r.Siz = 4 4822 r.Add = p.To.Offset 4823 r.Sym = p.To.Sym 4824 ab.PutInt32(0) 4825 4826 case Zcall, Zcallduff: 4827 if p.To.Sym == nil { 4828 ctxt.Diag("call without target") 4829 ctxt.DiagFlush() 4830 log.Fatalf("bad code") 4831 } 4832 4833 if yt.zcase == Zcallduff && ctxt.Flag_dynlink { 4834 ctxt.Diag("directly calling duff when dynamically linking Go") 4835 } 4836 4837 if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 { 4838 // Maintain BP around call, since duffcopy/duffzero can't do it 4839 // (the call jumps into the middle of the function). 4840 // This makes it possible to see call sites for duffcopy/duffzero in 4841 // BP-based profiling tools like Linux perf (which is the 4842 // whole point of maintaining frame pointers in Go). 4843 // MOVQ BP, -16(SP) 4844 // LEAQ -16(SP), BP 4845 ab.Put(bpduff1) 4846 } 4847 ab.Put1(byte(op)) 4848 r = obj.Addrel(cursym) 4849 r.Off = int32(p.Pc + int64(ab.Len())) 4850 r.Sym = p.To.Sym 4851 r.Add = p.To.Offset 4852 r.Type = objabi.R_CALL 4853 r.Siz = 4 4854 ab.PutInt32(0) 4855 4856 if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 { 4857 // Pop BP pushed above. 4858 // MOVQ 0(BP), BP 4859 ab.Put(bpduff2) 4860 } 4861 4862 // TODO: jump across functions needs reloc 4863 case Zbr, Zjmp, Zloop: 4864 if p.As == AXBEGIN { 4865 ab.Put1(byte(op)) 4866 } 4867 if p.To.Sym != nil { 4868 if yt.zcase != Zjmp { 4869 ctxt.Diag("branch to ATEXT") 4870 ctxt.DiagFlush() 4871 log.Fatalf("bad code") 4872 } 4873 4874 ab.Put1(o.op[z+1]) 4875 r = obj.Addrel(cursym) 4876 r.Off = int32(p.Pc + int64(ab.Len())) 4877 r.Sym = p.To.Sym 4878 // Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that 4879 // it can point to a trampoline instead of the destination itself. 4880 r.Type = objabi.R_CALL 4881 r.Siz = 4 4882 ab.PutInt32(0) 4883 break 4884 } 4885 4886 // Assumes q is in this function. 4887 // TODO: Check in input, preserve in brchain. 4888 4889 // Fill in backward jump now. 4890 q = p.To.Target() 4891 4892 if q == nil { 4893 ctxt.Diag("jmp/branch/loop without target") 4894 ctxt.DiagFlush() 4895 log.Fatalf("bad code") 4896 } 4897 4898 if p.Back&branchBackwards != 0 { 4899 v = q.Pc - (p.Pc + 2) 4900 if v >= -128 && p.As != AXBEGIN { 4901 if p.As == AJCXZL { 4902 ab.Put1(0x67) 4903 } 4904 ab.Put2(byte(op), byte(v)) 4905 } else if yt.zcase == Zloop { 4906 ctxt.Diag("loop too far: %v", p) 4907 } else { 4908 v -= 5 - 2 4909 if p.As == AXBEGIN { 4910 v-- 4911 } 4912 if yt.zcase == Zbr { 4913 ab.Put1(0x0f) 4914 v-- 4915 } 4916 4917 ab.Put1(o.op[z+1]) 4918 ab.PutInt32(int32(v)) 4919 } 4920 4921 break 4922 } 4923 4924 // Annotate target; will fill in later. 4925 p.Forwd = q.Rel 4926 4927 q.Rel = p 4928 if p.Back&branchShort != 0 && p.As != AXBEGIN { 4929 if p.As == AJCXZL { 4930 ab.Put1(0x67) 4931 } 4932 ab.Put2(byte(op), 0) 4933 } else if yt.zcase == Zloop { 4934 ctxt.Diag("loop too far: %v", p) 4935 } else { 4936 if yt.zcase == Zbr { 4937 ab.Put1(0x0f) 4938 } 4939 ab.Put1(o.op[z+1]) 4940 ab.PutInt32(0) 4941 } 4942 4943 case Zbyte: 4944 v = vaddr(ctxt, p, &p.From, &rel) 4945 if rel.Siz != 0 { 4946 rel.Siz = uint8(op) 4947 r = obj.Addrel(cursym) 4948 *r = rel 4949 r.Off = int32(p.Pc + int64(ab.Len())) 4950 } 4951 4952 ab.Put1(byte(v)) 4953 if op > 1 { 4954 ab.Put1(byte(v >> 8)) 4955 if op > 2 { 4956 ab.PutInt16(int16(v >> 16)) 4957 if op > 4 { 4958 ab.PutInt32(int32(v >> 32)) 4959 } 4960 } 4961 } 4962 } 4963 4964 return 4965 } 4966 } 4967 f3t = Ynone * Ymax 4968 if p.GetFrom3() != nil { 4969 f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax 4970 } 4971 for mo := ymovtab; mo[0].as != 0; mo = mo[1:] { 4972 var pp obj.Prog 4973 var t []byte 4974 if p.As == mo[0].as { 4975 if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 { 4976 t = mo[0].op[:] 4977 switch mo[0].code { 4978 default: 4979 ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p) 4980 4981 case movLit: 4982 for z = 0; t[z] != 0; z++ { 4983 ab.Put1(t[z]) 4984 } 4985 4986 case movRegMem: 4987 ab.Put1(t[0]) 4988 ab.asmando(ctxt, cursym, p, &p.To, int(t[1])) 4989 4990 case movMemReg: 4991 ab.Put1(t[0]) 4992 ab.asmando(ctxt, cursym, p, &p.From, int(t[1])) 4993 4994 case movRegMem2op: // r,m - 2op 4995 ab.Put2(t[0], t[1]) 4996 ab.asmando(ctxt, cursym, p, &p.To, int(t[2])) 4997 ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40) 4998 4999 case movMemReg2op: 5000 ab.Put2(t[0], t[1]) 5001 ab.asmando(ctxt, cursym, p, &p.From, int(t[2])) 5002 ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40) 5003 5004 case movFullPtr: 5005 if t[0] != 0 { 5006 ab.Put1(t[0]) 5007 } 5008 switch p.To.Index { 5009 default: 5010 goto bad 5011 5012 case REG_DS: 5013 ab.Put1(0xc5) 5014 5015 case REG_SS: 5016 ab.Put2(0x0f, 0xb2) 5017 5018 case REG_ES: 5019 ab.Put1(0xc4) 5020 5021 case REG_FS: 5022 ab.Put2(0x0f, 0xb4) 5023 5024 case REG_GS: 5025 ab.Put2(0x0f, 0xb5) 5026 } 5027 5028 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 5029 5030 case movDoubleShift: 5031 if t[0] == Pw { 5032 if ctxt.Arch.Family != sys.AMD64 { 5033 ctxt.Diag("asmins: illegal 64: %v", p) 5034 } 5035 ab.rexflag |= Pw 5036 t = t[1:] 5037 } else if t[0] == Pe { 5038 ab.Put1(Pe) 5039 t = t[1:] 5040 } 5041 5042 switch p.From.Type { 5043 default: 5044 goto bad 5045 5046 case obj.TYPE_CONST: 5047 ab.Put2(0x0f, t[0]) 5048 ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0) 5049 ab.Put1(byte(p.From.Offset)) 5050 5051 case obj.TYPE_REG: 5052 switch p.From.Reg { 5053 default: 5054 goto bad 5055 5056 case REG_CL, REG_CX: 5057 ab.Put2(0x0f, t[1]) 5058 ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0) 5059 } 5060 } 5061 5062 // NOTE: The systems listed here are the ones that use the "TLS initial exec" model, 5063 // where you load the TLS base register into a register and then index off that 5064 // register to access the actual TLS variables. Systems that allow direct TLS access 5065 // are handled in prefixof above and should not be listed here. 5066 case movTLSReg: 5067 if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL { 5068 ctxt.Diag("invalid load of TLS: %v", p) 5069 } 5070 5071 if ctxt.Arch.Family == sys.I386 { 5072 // NOTE: The systems listed here are the ones that use the "TLS initial exec" model, 5073 // where you load the TLS base register into a register and then index off that 5074 // register to access the actual TLS variables. Systems that allow direct TLS access 5075 // are handled in prefixof above and should not be listed here. 5076 switch ctxt.Headtype { 5077 default: 5078 log.Fatalf("unknown TLS base location for %v", ctxt.Headtype) 5079 5080 case objabi.Hlinux, objabi.Hfreebsd: 5081 if ctxt.Flag_shared { 5082 // Note that this is not generating the same insns as the other cases. 5083 // MOV TLS, dst 5084 // becomes 5085 // call __x86.get_pc_thunk.dst 5086 // movl (gotpc + g@gotntpoff)(dst), dst 5087 // which is encoded as 5088 // call __x86.get_pc_thunk.dst 5089 // movq 0(dst), dst 5090 // and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access 5091 // is g, which we can't check here, but will when we assemble the second 5092 // instruction. 5093 dst := p.To.Reg 5094 ab.Put1(0xe8) 5095 r = obj.Addrel(cursym) 5096 r.Off = int32(p.Pc + int64(ab.Len())) 5097 r.Type = objabi.R_CALL 5098 r.Siz = 4 5099 r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))) 5100 ab.PutInt32(0) 5101 5102 ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3))) 5103 r = obj.Addrel(cursym) 5104 r.Off = int32(p.Pc + int64(ab.Len())) 5105 r.Type = objabi.R_TLS_IE 5106 r.Siz = 4 5107 r.Add = 2 5108 ab.PutInt32(0) 5109 } else { 5110 // ELF TLS base is 0(GS). 5111 pp.From = p.From 5112 5113 pp.From.Type = obj.TYPE_MEM 5114 pp.From.Reg = REG_GS 5115 pp.From.Offset = 0 5116 pp.From.Index = REG_NONE 5117 pp.From.Scale = 0 5118 ab.Put2(0x65, // GS 5119 0x8B) 5120 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5121 } 5122 case objabi.Hplan9: 5123 pp.From = obj.Addr{} 5124 pp.From.Type = obj.TYPE_MEM 5125 pp.From.Name = obj.NAME_EXTERN 5126 pp.From.Sym = plan9privates 5127 pp.From.Offset = 0 5128 pp.From.Index = REG_NONE 5129 ab.Put1(0x8B) 5130 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5131 5132 case objabi.Hwindows: 5133 // Windows TLS base is always 0x14(FS). 5134 pp.From = p.From 5135 5136 pp.From.Type = obj.TYPE_MEM 5137 pp.From.Reg = REG_FS 5138 pp.From.Offset = 0x14 5139 pp.From.Index = REG_NONE 5140 pp.From.Scale = 0 5141 ab.Put2(0x64, // FS 5142 0x8B) 5143 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5144 } 5145 break 5146 } 5147 5148 switch ctxt.Headtype { 5149 default: 5150 log.Fatalf("unknown TLS base location for %v", ctxt.Headtype) 5151 5152 case objabi.Hlinux, objabi.Hfreebsd: 5153 if !ctxt.Flag_shared { 5154 log.Fatalf("unknown TLS base location for linux/freebsd without -shared") 5155 } 5156 // Note that this is not generating the same insn as the other cases. 5157 // MOV TLS, R_to 5158 // becomes 5159 // movq g@gottpoff(%rip), R_to 5160 // which is encoded as 5161 // movq 0(%rip), R_to 5162 // and a R_TLS_IE reloc. This all assumes the only tls variable we access 5163 // is g, which we can't check here, but will when we assemble the second 5164 // instruction. 5165 ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr) 5166 5167 ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3))) 5168 r = obj.Addrel(cursym) 5169 r.Off = int32(p.Pc + int64(ab.Len())) 5170 r.Type = objabi.R_TLS_IE 5171 r.Siz = 4 5172 r.Add = -4 5173 ab.PutInt32(0) 5174 5175 case objabi.Hplan9: 5176 pp.From = obj.Addr{} 5177 pp.From.Type = obj.TYPE_MEM 5178 pp.From.Name = obj.NAME_EXTERN 5179 pp.From.Sym = plan9privates 5180 pp.From.Offset = 0 5181 pp.From.Index = REG_NONE 5182 ab.rexflag |= Pw 5183 ab.Put1(0x8B) 5184 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5185 5186 case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c. 5187 // TLS base is 0(FS). 5188 pp.From = p.From 5189 5190 pp.From.Type = obj.TYPE_MEM 5191 pp.From.Name = obj.NAME_NONE 5192 pp.From.Reg = REG_NONE 5193 pp.From.Offset = 0 5194 pp.From.Index = REG_NONE 5195 pp.From.Scale = 0 5196 ab.rexflag |= Pw 5197 ab.Put2(0x64, // FS 5198 0x8B) 5199 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5200 5201 case objabi.Hwindows: 5202 // Windows TLS base is always 0x28(GS). 5203 pp.From = p.From 5204 5205 pp.From.Type = obj.TYPE_MEM 5206 pp.From.Name = obj.NAME_NONE 5207 pp.From.Reg = REG_GS 5208 pp.From.Offset = 0x28 5209 pp.From.Index = REG_NONE 5210 pp.From.Scale = 0 5211 ab.rexflag |= Pw 5212 ab.Put2(0x65, // GS 5213 0x8B) 5214 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5215 } 5216 } 5217 return 5218 } 5219 } 5220 } 5221 goto bad 5222 5223bad: 5224 if ctxt.Arch.Family != sys.AMD64 { 5225 // here, the assembly has failed. 5226 // if it's a byte instruction that has 5227 // unaddressable registers, try to 5228 // exchange registers and reissue the 5229 // instruction with the operands renamed. 5230 pp := *p 5231 5232 unbytereg(&pp.From, &pp.Ft) 5233 unbytereg(&pp.To, &pp.Tt) 5234 5235 z := int(p.From.Reg) 5236 if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI { 5237 // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base. 5238 // For now, different to keep bit-for-bit compatibility. 5239 if ctxt.Arch.Family == sys.I386 { 5240 breg := byteswapreg(ctxt, &p.To) 5241 if breg != REG_AX { 5242 ab.Put1(0x87) // xchg lhs,bx 5243 ab.asmando(ctxt, cursym, p, &p.From, reg[breg]) 5244 subreg(&pp, z, breg) 5245 ab.doasm(ctxt, cursym, &pp) 5246 ab.Put1(0x87) // xchg lhs,bx 5247 ab.asmando(ctxt, cursym, p, &p.From, reg[breg]) 5248 } else { 5249 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5250 subreg(&pp, z, REG_AX) 5251 ab.doasm(ctxt, cursym, &pp) 5252 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5253 } 5254 return 5255 } 5256 5257 if isax(&p.To) || p.To.Type == obj.TYPE_NONE { 5258 // We certainly don't want to exchange 5259 // with AX if the op is MUL or DIV. 5260 ab.Put1(0x87) // xchg lhs,bx 5261 ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX]) 5262 subreg(&pp, z, REG_BX) 5263 ab.doasm(ctxt, cursym, &pp) 5264 ab.Put1(0x87) // xchg lhs,bx 5265 ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX]) 5266 } else { 5267 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5268 subreg(&pp, z, REG_AX) 5269 ab.doasm(ctxt, cursym, &pp) 5270 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5271 } 5272 return 5273 } 5274 5275 z = int(p.To.Reg) 5276 if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI { 5277 // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base. 5278 // For now, different to keep bit-for-bit compatibility. 5279 if ctxt.Arch.Family == sys.I386 { 5280 breg := byteswapreg(ctxt, &p.From) 5281 if breg != REG_AX { 5282 ab.Put1(0x87) //xchg rhs,bx 5283 ab.asmando(ctxt, cursym, p, &p.To, reg[breg]) 5284 subreg(&pp, z, breg) 5285 ab.doasm(ctxt, cursym, &pp) 5286 ab.Put1(0x87) // xchg rhs,bx 5287 ab.asmando(ctxt, cursym, p, &p.To, reg[breg]) 5288 } else { 5289 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5290 subreg(&pp, z, REG_AX) 5291 ab.doasm(ctxt, cursym, &pp) 5292 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5293 } 5294 return 5295 } 5296 5297 if isax(&p.From) { 5298 ab.Put1(0x87) // xchg rhs,bx 5299 ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX]) 5300 subreg(&pp, z, REG_BX) 5301 ab.doasm(ctxt, cursym, &pp) 5302 ab.Put1(0x87) // xchg rhs,bx 5303 ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX]) 5304 } else { 5305 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5306 subreg(&pp, z, REG_AX) 5307 ab.doasm(ctxt, cursym, &pp) 5308 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5309 } 5310 return 5311 } 5312 } 5313 5314 ctxt.Diag("%s: invalid instruction: %v", cursym.Name, p) 5315} 5316 5317// byteswapreg returns a byte-addressable register (AX, BX, CX, DX) 5318// which is not referenced in a. 5319// If a is empty, it returns BX to account for MULB-like instructions 5320// that might use DX and AX. 5321func byteswapreg(ctxt *obj.Link, a *obj.Addr) int { 5322 cana, canb, canc, cand := true, true, true, true 5323 if a.Type == obj.TYPE_NONE { 5324 cana, cand = false, false 5325 } 5326 5327 if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) { 5328 switch a.Reg { 5329 case REG_NONE: 5330 cana, cand = false, false 5331 case REG_AX, REG_AL, REG_AH: 5332 cana = false 5333 case REG_BX, REG_BL, REG_BH: 5334 canb = false 5335 case REG_CX, REG_CL, REG_CH: 5336 canc = false 5337 case REG_DX, REG_DL, REG_DH: 5338 cand = false 5339 } 5340 } 5341 5342 if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR { 5343 switch a.Index { 5344 case REG_AX: 5345 cana = false 5346 case REG_BX: 5347 canb = false 5348 case REG_CX: 5349 canc = false 5350 case REG_DX: 5351 cand = false 5352 } 5353 } 5354 5355 switch { 5356 case cana: 5357 return REG_AX 5358 case canb: 5359 return REG_BX 5360 case canc: 5361 return REG_CX 5362 case cand: 5363 return REG_DX 5364 default: 5365 ctxt.Diag("impossible byte register") 5366 ctxt.DiagFlush() 5367 log.Fatalf("bad code") 5368 return 0 5369 } 5370} 5371 5372func isbadbyte(a *obj.Addr) bool { 5373 return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB) 5374} 5375 5376func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { 5377 ab.Reset() 5378 5379 ab.rexflag = 0 5380 ab.vexflag = false 5381 ab.evexflag = false 5382 mark := ab.Len() 5383 ab.doasm(ctxt, cursym, p) 5384 if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag { 5385 // as befits the whole approach of the architecture, 5386 // the rex prefix must appear before the first opcode byte 5387 // (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but 5388 // before the 0f opcode escape!), or it might be ignored. 5389 // note that the handbook often misleadingly shows 66/f2/f3 in `opcode'. 5390 if ctxt.Arch.Family != sys.AMD64 { 5391 ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt) 5392 } 5393 n := ab.Len() 5394 var np int 5395 for np = mark; np < n; np++ { 5396 c := ab.At(np) 5397 if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 { 5398 break 5399 } 5400 } 5401 ab.Insert(np, byte(0x40|ab.rexflag)) 5402 } 5403 5404 n := ab.Len() 5405 for i := len(cursym.R) - 1; i >= 0; i-- { 5406 r := &cursym.R[i] 5407 if int64(r.Off) < p.Pc { 5408 break 5409 } 5410 if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag { 5411 r.Off++ 5412 } 5413 if r.Type == objabi.R_PCREL { 5414 if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL { 5415 // PC-relative addressing is relative to the end of the instruction, 5416 // but the relocations applied by the linker are relative to the end 5417 // of the relocation. Because immediate instruction 5418 // arguments can follow the PC-relative memory reference in the 5419 // instruction encoding, the two may not coincide. In this case, 5420 // adjust addend so that linker can keep relocating relative to the 5421 // end of the relocation. 5422 r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz)) 5423 } else if ctxt.Arch.Family == sys.I386 { 5424 // On 386 PC-relative addressing (for non-call/jmp instructions) 5425 // assumes that the previous instruction loaded the PC of the end 5426 // of that instruction into CX, so the adjustment is relative to 5427 // that. 5428 r.Add += int64(r.Off) - p.Pc + int64(r.Siz) 5429 } 5430 } 5431 if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 { 5432 // On 386, R_GOTPCREL makes the same assumptions as R_PCREL. 5433 r.Add += int64(r.Off) - p.Pc + int64(r.Siz) 5434 } 5435 5436 } 5437} 5438 5439// unpackOps4 extracts 4 operands from p. 5440func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) { 5441 return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.To 5442} 5443 5444// unpackOps5 extracts 5 operands from p. 5445func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) { 5446 return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.RestArgs[2].Addr, &p.To 5447} 5448