1----------------------------------------------------------------------------
2-- LuaJIT x86/x64 disassembler module.
3--
4-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- This is a helper module used by the LuaJIT machine code dumper module.
8--
9-- Sending small code snippets to an external disassembler and mixing the
10-- output with our own stuff was too fragile. So I had to bite the bullet
11-- and write yet another x86 disassembler. Oh well ...
12--
13-- The output format is very similar to what ndisasm generates. But it has
14-- been developed independently by looking at the opcode tables from the
15-- Intel and AMD manuals. The supported instruction set is quite extensive
16-- and reflects what a current generation Intel or AMD CPU implements in
17-- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3,
18-- SSE4.1, SSE4.2, SSE4a, AVX, AVX2 and even privileged and hypervisor
19-- (VMX/SVM) instructions.
20--
21-- Notes:
22-- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported.
23-- * No attempt at optimization has been made -- it's fast enough for my needs.
24------------------------------------------------------------------------------
25
26local type = type
27local sub, byte, format = string.sub, string.byte, string.format
28local match, gmatch, gsub = string.match, string.gmatch, string.gsub
29local lower, rep = string.lower, string.rep
30local bit = require("bit")
31local tohex = bit.tohex
32
33-- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on.
34local map_opc1_32 = {
35--0x
36[0]="addBmr","addVmr","addBrm","addVrm","addBai","addVai","push es","pop es",
37"orBmr","orVmr","orBrm","orVrm","orBai","orVai","push cs","opc2*",
38--1x
39"adcBmr","adcVmr","adcBrm","adcVrm","adcBai","adcVai","push ss","pop ss",
40"sbbBmr","sbbVmr","sbbBrm","sbbVrm","sbbBai","sbbVai","push ds","pop ds",
41--2x
42"andBmr","andVmr","andBrm","andVrm","andBai","andVai","es:seg","daa",
43"subBmr","subVmr","subBrm","subVrm","subBai","subVai","cs:seg","das",
44--3x
45"xorBmr","xorVmr","xorBrm","xorVrm","xorBai","xorVai","ss:seg","aaa",
46"cmpBmr","cmpVmr","cmpBrm","cmpVrm","cmpBai","cmpVai","ds:seg","aas",
47--4x
48"incVR","incVR","incVR","incVR","incVR","incVR","incVR","incVR",
49"decVR","decVR","decVR","decVR","decVR","decVR","decVR","decVR",
50--5x
51"pushUR","pushUR","pushUR","pushUR","pushUR","pushUR","pushUR","pushUR",
52"popUR","popUR","popUR","popUR","popUR","popUR","popUR","popUR",
53--6x
54"sz*pushaw,pusha","sz*popaw,popa","boundVrm","arplWmr",
55"fs:seg","gs:seg","o16:","a16",
56"pushUi","imulVrmi","pushBs","imulVrms",
57"insb","insVS","outsb","outsVS",
58--7x
59"joBj","jnoBj","jbBj","jnbBj","jzBj","jnzBj","jbeBj","jaBj",
60"jsBj","jnsBj","jpeBj","jpoBj","jlBj","jgeBj","jleBj","jgBj",
61--8x
62"arith!Bmi","arith!Vmi","arith!Bmi","arith!Vms",
63"testBmr","testVmr","xchgBrm","xchgVrm",
64"movBmr","movVmr","movBrm","movVrm",
65"movVmg","leaVrm","movWgm","popUm",
66--9x
67"nop*xchgVaR|pause|xchgWaR|repne nop","xchgVaR","xchgVaR","xchgVaR",
68"xchgVaR","xchgVaR","xchgVaR","xchgVaR",
69"sz*cbw,cwde,cdqe","sz*cwd,cdq,cqo","call farViw","wait",
70"sz*pushfw,pushf","sz*popfw,popf","sahf","lahf",
71--Ax
72"movBao","movVao","movBoa","movVoa",
73"movsb","movsVS","cmpsb","cmpsVS",
74"testBai","testVai","stosb","stosVS",
75"lodsb","lodsVS","scasb","scasVS",
76--Bx
77"movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi",
78"movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI",
79--Cx
80"shift!Bmu","shift!Vmu","retBw","ret","vex*3$lesVrm","vex*2$ldsVrm","movBmi","movVmi",
81"enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS",
82--Dx
83"shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb",
84"fp*0","fp*1","fp*2","fp*3","fp*4","fp*5","fp*6","fp*7",
85--Ex
86"loopneBj","loopeBj","loopBj","sz*jcxzBj,jecxzBj,jrcxzBj",
87"inBau","inVau","outBua","outVua",
88"callVj","jmpVj","jmp farViw","jmpBj","inBad","inVad","outBda","outVda",
89--Fx
90"lock:","int1","repne:rep","rep:","hlt","cmc","testb!Bm","testv!Vm",
91"clc","stc","cli","sti","cld","std","incb!Bm","incd!Vm",
92}
93assert(#map_opc1_32 == 255)
94
95-- Map for 1st opcode byte in 64 bit mode (overrides only).
96local map_opc1_64 = setmetatable({
97  [0x06]=false, [0x07]=false, [0x0e]=false,
98  [0x16]=false, [0x17]=false, [0x1e]=false, [0x1f]=false,
99  [0x27]=false, [0x2f]=false, [0x37]=false, [0x3f]=false,
100  [0x60]=false, [0x61]=false, [0x62]=false, [0x63]="movsxdVrDmt", [0x67]="a32:",
101  [0x40]="rex*",   [0x41]="rex*b",   [0x42]="rex*x",   [0x43]="rex*xb",
102  [0x44]="rex*r",  [0x45]="rex*rb",  [0x46]="rex*rx",  [0x47]="rex*rxb",
103  [0x48]="rex*w",  [0x49]="rex*wb",  [0x4a]="rex*wx",  [0x4b]="rex*wxb",
104  [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb",
105  [0x82]=false, [0x9a]=false, [0xc4]="vex*3", [0xc5]="vex*2", [0xce]=false,
106  [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false,
107}, { __index = map_opc1_32 })
108
109-- Map for 2nd opcode byte (0F xx). True CISC hell. Hey, I told you.
110-- Prefix dependent MMX/SSE opcodes: (none)|rep|o16|repne, -|F3|66|F2
111local map_opc2 = {
112--0x
113[0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret",
114"invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu",
115--1x
116"movupsXrm|movssXrvm|movupdXrm|movsdXrvm",
117"movupsXmr|movssXmvr|movupdXmr|movsdXmvr",
118"movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm",
119"movlpsXmr||movlpdXmr",
120"unpcklpsXrvm||unpcklpdXrvm",
121"unpckhpsXrvm||unpckhpdXrvm",
122"movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm",
123"movhpsXmr||movhpdXmr",
124"$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm",
125"hintnopVm","hintnopVm","hintnopVm","hintnopVm",
126--2x
127"movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil,
128"movapsXrm||movapdXrm",
129"movapsXmr||movapdXmr",
130"cvtpi2psXrMm|cvtsi2ssXrvVmt|cvtpi2pdXrMm|cvtsi2sdXrvVmt",
131"movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr",
132"cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm",
133"cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm",
134"ucomissXrm||ucomisdXrm",
135"comissXrm||comisdXrm",
136--3x
137"wrmsr","rdtsc","rdmsr","rdpmc","sysenter","sysexit",nil,"getsec",
138"opc3*38",nil,"opc3*3a",nil,nil,nil,nil,nil,
139--4x
140"cmovoVrm","cmovnoVrm","cmovbVrm","cmovnbVrm",
141"cmovzVrm","cmovnzVrm","cmovbeVrm","cmovaVrm",
142"cmovsVrm","cmovnsVrm","cmovpeVrm","cmovpoVrm",
143"cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm",
144--5x
145"movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm",
146"rsqrtpsXrm|rsqrtssXrvm","rcppsXrm|rcpssXrvm",
147"andpsXrvm||andpdXrvm","andnpsXrvm||andnpdXrvm",
148"orpsXrvm||orpdXrvm","xorpsXrvm||xorpdXrvm",
149"addpsXrvm|addssXrvm|addpdXrvm|addsdXrvm","mulpsXrvm|mulssXrvm|mulpdXrvm|mulsdXrvm",
150"cvtps2pdXrm|cvtss2sdXrvm|cvtpd2psXrm|cvtsd2ssXrvm",
151"cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm",
152"subpsXrvm|subssXrvm|subpdXrvm|subsdXrvm","minpsXrvm|minssXrvm|minpdXrvm|minsdXrvm",
153"divpsXrvm|divssXrvm|divpdXrvm|divsdXrvm","maxpsXrvm|maxssXrvm|maxpdXrvm|maxsdXrvm",
154--6x
155"punpcklbwPrvm","punpcklwdPrvm","punpckldqPrvm","packsswbPrvm",
156"pcmpgtbPrvm","pcmpgtwPrvm","pcmpgtdPrvm","packuswbPrvm",
157"punpckhbwPrvm","punpckhwdPrvm","punpckhdqPrvm","packssdwPrvm",
158"||punpcklqdqXrvm","||punpckhqdqXrvm",
159"movPrVSm","movqMrm|movdquXrm|movdqaXrm",
160--7x
161"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pvmu",
162"pshiftd!Pvmu","pshiftq!Mvmu||pshiftdq!Xvmu",
163"pcmpeqbPrvm","pcmpeqwPrvm","pcmpeqdPrvm","emms*|",
164"vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$",
165nil,nil,
166"||haddpdXrvm|haddpsXrvm","||hsubpdXrvm|hsubpsXrvm",
167"movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr",
168--8x
169"joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj",
170"jsVj","jnsVj","jpeVj","jpoVj","jlVj","jgeVj","jleVj","jgVj",
171--9x
172"setoBm","setnoBm","setbBm","setnbBm","setzBm","setnzBm","setbeBm","setaBm",
173"setsBm","setnsBm","setpeBm","setpoBm","setlBm","setgeBm","setleBm","setgBm",
174--Ax
175"push fs","pop fs","cpuid","btVmr","shldVmru","shldVmrc",nil,nil,
176"push gs","pop gs","rsm","btsVmr","shrdVmru","shrdVmrc","fxsave!Dmp","imulVrm",
177--Bx
178"cmpxchgBmr","cmpxchgVmr","$lssVrm","btrVmr",
179"$lfsVrm","$lgsVrm","movzxVrBmt","movzxVrWmt",
180"|popcntVrm","ud2Dp","bt!Vmu","btcVmr",
181"bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt",
182--Cx
183"xaddBmr","xaddVmr",
184"cmppsXrvmu|cmpssXrvmu|cmppdXrvmu|cmpsdXrvmu","$movntiVmr|",
185"pinsrwPrvWmu","pextrwDrPmu",
186"shufpsXrvmu||shufpdXrvmu","$cmpxchg!Qmp",
187"bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR",
188--Dx
189"||addsubpdXrvm|addsubpsXrvm","psrlwPrvm","psrldPrvm","psrlqPrvm",
190"paddqPrvm","pmullwPrvm",
191"|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm",
192"psubusbPrvm","psubuswPrvm","pminubPrvm","pandPrvm",
193"paddusbPrvm","padduswPrvm","pmaxubPrvm","pandnPrvm",
194--Ex
195"pavgbPrvm","psrawPrvm","psradPrvm","pavgwPrvm",
196"pmulhuwPrvm","pmulhwPrvm",
197"|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr",
198"psubsbPrvm","psubswPrvm","pminswPrvm","porPrvm",
199"paddsbPrvm","paddswPrvm","pmaxswPrvm","pxorPrvm",
200--Fx
201"|||lddquXrm","psllwPrvm","pslldPrvm","psllqPrvm",
202"pmuludqPrvm","pmaddwdPrvm","psadbwPrvm","maskmovqMrm||maskmovdquXrm$",
203"psubbPrvm","psubwPrvm","psubdPrvm","psubqPrvm",
204"paddbPrvm","paddwPrvm","padddPrvm","ud",
205}
206assert(map_opc2[255] == "ud")
207
208-- Map for three-byte opcodes. Can't wait for their next invention.
209local map_opc3 = {
210["38"] = { -- [66] 0f 38 xx
211--0x
212[0]="pshufbPrvm","phaddwPrvm","phadddPrvm","phaddswPrvm",
213"pmaddubswPrvm","phsubwPrvm","phsubdPrvm","phsubswPrvm",
214"psignbPrvm","psignwPrvm","psigndPrvm","pmulhrswPrvm",
215"||permilpsXrvm","||permilpdXrvm",nil,nil,
216--1x
217"||pblendvbXrma",nil,nil,nil,
218"||blendvpsXrma","||blendvpdXrma","||permpsXrvm","||ptestXrm",
219"||broadcastssXrm","||broadcastsdXrm","||broadcastf128XrlXm",nil,
220"pabsbPrm","pabswPrm","pabsdPrm",nil,
221--2x
222"||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm",
223"||pmovsxwqXrm","||pmovsxdqXrm",nil,nil,
224"||pmuldqXrvm","||pcmpeqqXrvm","||$movntdqaXrm","||packusdwXrvm",
225"||maskmovpsXrvm","||maskmovpdXrvm","||maskmovpsXmvr","||maskmovpdXmvr",
226--3x
227"||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm",
228"||pmovzxwqXrm","||pmovzxdqXrm","||permdXrvm","||pcmpgtqXrvm",
229"||pminsbXrvm","||pminsdXrvm","||pminuwXrvm","||pminudXrvm",
230"||pmaxsbXrvm","||pmaxsdXrvm","||pmaxuwXrvm","||pmaxudXrvm",
231--4x
232"||pmulddXrvm","||phminposuwXrm",nil,nil,
233nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm",
234--5x
235[0x58] = "||pbroadcastdXrlXm",[0x59] = "||pbroadcastqXrlXm",
236[0x5a] = "||broadcasti128XrlXm",
237--7x
238[0x78] = "||pbroadcastbXrlXm",[0x79] = "||pbroadcastwXrlXm",
239--8x
240[0x8c] = "||pmaskmovXrvVSm",
241[0x8e] = "||pmaskmovVSmXvr",
242--Dx
243[0xdc] = "||aesencXrvm", [0xdd] = "||aesenclastXrvm",
244[0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm",
245--Fx
246[0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt",
247[0xf7] = "| sarxVrmv| shlxVrmv| shrxVrmv",
248},
249
250["3a"] = { -- [66] 0f 3a xx
251--0x
252[0x00]="||permqXrmu","||permpdXrmu","||pblenddXrvmu",nil,
253"||permilpsXrmu","||permilpdXrmu","||perm2f128Xrvmu",nil,
254"||roundpsXrmu","||roundpdXrmu","||roundssXrvmu","||roundsdXrvmu",
255"||blendpsXrvmu","||blendpdXrvmu","||pblendwXrvmu","palignrPrvmu",
256--1x
257nil,nil,nil,nil,
258"||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru",
259"||insertf128XrvlXmu","||extractf128XlXmYru",nil,nil,
260nil,nil,nil,nil,
261--2x
262"||pinsrbXrvVmu","||insertpsXrvmu","||pinsrXrvVmuS",nil,
263--3x
264[0x38] = "||inserti128Xrvmu",[0x39] = "||extracti128XlXmYru",
265--4x
266[0x40] = "||dppsXrvmu",
267[0x41] = "||dppdXrvmu",
268[0x42] = "||mpsadbwXrvmu",
269[0x44] = "||pclmulqdqXrvmu",
270[0x46] = "||perm2i128Xrvmu",
271[0x4a] = "||blendvpsXrvmb",[0x4b] = "||blendvpdXrvmb",
272[0x4c] = "||pblendvbXrvmb",
273--6x
274[0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu",
275[0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu",
276[0xdf] = "||aeskeygenassistXrmu",
277--Fx
278[0xf0] = "||| rorxVrmu",
279},
280}
281
282-- Map for VMX/SVM opcodes 0F 01 C0-FF (sgdt group with register operands).
283local map_opcvm = {
284[0xc1]="vmcall",[0xc2]="vmlaunch",[0xc3]="vmresume",[0xc4]="vmxoff",
285[0xc8]="monitor",[0xc9]="mwait",
286[0xd8]="vmrun",[0xd9]="vmmcall",[0xda]="vmload",[0xdb]="vmsave",
287[0xdc]="stgi",[0xdd]="clgi",[0xde]="skinit",[0xdf]="invlpga",
288[0xf8]="swapgs",[0xf9]="rdtscp",
289}
290
291-- Map for FP opcodes. And you thought stack machines are simple?
292local map_opcfp = {
293-- D8-DF 00-BF: opcodes with a memory operand.
294-- D8
295[0]="faddFm","fmulFm","fcomFm","fcompFm","fsubFm","fsubrFm","fdivFm","fdivrFm",
296"fldFm",nil,"fstFm","fstpFm","fldenvVm","fldcwWm","fnstenvVm","fnstcwWm",
297-- DA
298"fiaddDm","fimulDm","ficomDm","ficompDm",
299"fisubDm","fisubrDm","fidivDm","fidivrDm",
300-- DB
301"fildDm","fisttpDm","fistDm","fistpDm",nil,"fld twordFmp",nil,"fstp twordFmp",
302-- DC
303"faddGm","fmulGm","fcomGm","fcompGm","fsubGm","fsubrGm","fdivGm","fdivrGm",
304-- DD
305"fldGm","fisttpQm","fstGm","fstpGm","frstorDmp",nil,"fnsaveDmp","fnstswWm",
306-- DE
307"fiaddWm","fimulWm","ficomWm","ficompWm",
308"fisubWm","fisubrWm","fidivWm","fidivrWm",
309-- DF
310"fildWm","fisttpWm","fistWm","fistpWm",
311"fbld twordFmp","fildQm","fbstp twordFmp","fistpQm",
312-- xx C0-FF: opcodes with a pseudo-register operand.
313-- D8
314"faddFf","fmulFf","fcomFf","fcompFf","fsubFf","fsubrFf","fdivFf","fdivrFf",
315-- D9
316"fldFf","fxchFf",{"fnop"},nil,
317{"fchs","fabs",nil,nil,"ftst","fxam"},
318{"fld1","fldl2t","fldl2e","fldpi","fldlg2","fldln2","fldz"},
319{"f2xm1","fyl2x","fptan","fpatan","fxtract","fprem1","fdecstp","fincstp"},
320{"fprem","fyl2xp1","fsqrt","fsincos","frndint","fscale","fsin","fcos"},
321-- DA
322"fcmovbFf","fcmoveFf","fcmovbeFf","fcmovuFf",nil,{nil,"fucompp"},nil,nil,
323-- DB
324"fcmovnbFf","fcmovneFf","fcmovnbeFf","fcmovnuFf",
325{nil,nil,"fnclex","fninit"},"fucomiFf","fcomiFf",nil,
326-- DC
327"fadd toFf","fmul toFf",nil,nil,
328"fsub toFf","fsubr toFf","fdivr toFf","fdiv toFf",
329-- DD
330"ffreeFf",nil,"fstFf","fstpFf","fucomFf","fucompFf",nil,nil,
331-- DE
332"faddpFf","fmulpFf",nil,{nil,"fcompp"},
333"fsubrpFf","fsubpFf","fdivrpFf","fdivpFf",
334-- DF
335nil,nil,nil,nil,{"fnstsw ax"},"fucomipFf","fcomipFf",nil,
336}
337assert(map_opcfp[126] == "fcomipFf")
338
339-- Map for opcode groups. The subkey is sp from the ModRM byte.
340local map_opcgroup = {
341  arith = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" },
342  shift = { "rol", "ror", "rcl", "rcr", "shl", "shr", "sal", "sar" },
343  testb = { "testBmi", "testBmi", "not", "neg", "mul", "imul", "div", "idiv" },
344  testv = { "testVmi", "testVmi", "not", "neg", "mul", "imul", "div", "idiv" },
345  incb = { "inc", "dec" },
346  incd = { "inc", "dec", "callUmp", "$call farDmp",
347	   "jmpUmp", "$jmp farDmp", "pushUm" },
348  sldt = { "sldt", "str", "lldt", "ltr", "verr", "verw" },
349  sgdt = { "vm*$sgdt", "vm*$sidt", "$lgdt", "vm*$lidt",
350	   "smsw", nil, "lmsw", "vm*$invlpg" },
351  bt = { nil, nil, nil, nil, "bt", "bts", "btr", "btc" },
352  cmpxchg = { nil, "sz*,cmpxchg8bQmp,cmpxchg16bXmp", nil, nil,
353	      nil, nil, "vmptrld|vmxon|vmclear", "vmptrst" },
354  pshiftw = { nil, nil, "psrlw", nil, "psraw", nil, "psllw" },
355  pshiftd = { nil, nil, "psrld", nil, "psrad", nil, "pslld" },
356  pshiftq = { nil, nil, "psrlq", nil, nil, nil, "psllq" },
357  pshiftdq = { nil, nil, "psrlq", "psrldq", nil, nil, "psllq", "pslldq" },
358  fxsave = { "$fxsave", "$fxrstor", "$ldmxcsr", "$stmxcsr",
359	     nil, "lfenceDp$", "mfenceDp$", "sfenceDp$clflush" },
360  prefetch = { "prefetch", "prefetchw" },
361  prefetcht = { "prefetchnta", "prefetcht0", "prefetcht1", "prefetcht2" },
362}
363
364------------------------------------------------------------------------------
365
366-- Maps for register names.
367local map_regs = {
368  B = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh",
369	"r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" },
370  B64 = { "al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil",
371	  "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" },
372  W = { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di",
373	"r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w" },
374  D = { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
375	"r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" },
376  Q = { "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
377	"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" },
378  M = { "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
379	"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext!
380  X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
381	"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" },
382  Y = { "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7",
383	"ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15" },
384}
385local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" }
386
387-- Maps for size names.
388local map_sz2n = {
389  B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, Y = 32,
390}
391local map_sz2prefix = {
392  B = "byte", W = "word", D = "dword",
393  Q = "qword",
394  M = "qword", X = "xword", Y = "yword",
395  F = "dword", G = "qword", -- No need for sizes/register names for these two.
396}
397
398------------------------------------------------------------------------------
399
400-- Output a nicely formatted line with an opcode and operands.
401local function putop(ctx, text, operands)
402  local code, pos, hex = ctx.code, ctx.pos, ""
403  local hmax = ctx.hexdump
404  if hmax > 0 then
405    for i=ctx.start,pos-1 do
406      hex = hex..format("%02X", byte(code, i, i))
407    end
408    if #hex > hmax then hex = sub(hex, 1, hmax)..". "
409    else hex = hex..rep(" ", hmax-#hex+2) end
410  end
411  if operands then text = text.." "..operands end
412  if ctx.o16 then text = "o16 "..text; ctx.o16 = false end
413  if ctx.a32 then text = "a32 "..text; ctx.a32 = false end
414  if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end
415  if ctx.rex then
416    local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "")..
417	      (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "")..
418	      (ctx.vexl and "l" or "")
419    if ctx.vexv and ctx.vexv ~= 0 then t = t.."v"..ctx.vexv end
420    if t ~= "" then text = ctx.rex.."."..t.." "..gsub(text, "^ ", "")
421    elseif ctx.rex == "vex" then text = gsub("v"..text, "^v ", "") end
422    ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
423    ctx.rex = false; ctx.vexl = false; ctx.vexv = false
424  end
425  if ctx.seg then
426    local text2, n = gsub(text, "%[", "["..ctx.seg..":")
427    if n == 0 then text = ctx.seg.." "..text else text = text2 end
428    ctx.seg = false
429  end
430  if ctx.lock then text = "lock "..text; ctx.lock = false end
431  local imm = ctx.imm
432  if imm then
433    local sym = ctx.symtab[imm]
434    if sym then text = text.."\t->"..sym end
435  end
436  ctx.out(format("%08x  %s%s\n", ctx.addr+ctx.start, hex, text))
437  ctx.mrm = false
438  ctx.vexv = false
439  ctx.start = pos
440  ctx.imm = nil
441end
442
443-- Clear all prefix flags.
444local function clearprefixes(ctx)
445  ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false
446  ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
447  ctx.rex = false; ctx.a32 = false; ctx.vexl = false
448end
449
450-- Fallback for incomplete opcodes at the end.
451local function incomplete(ctx)
452  ctx.pos = ctx.stop+1
453  clearprefixes(ctx)
454  return putop(ctx, "(incomplete)")
455end
456
457-- Fallback for unknown opcodes.
458local function unknown(ctx)
459  clearprefixes(ctx)
460  return putop(ctx, "(unknown)")
461end
462
463-- Return an immediate of the specified size.
464local function getimm(ctx, pos, n)
465  if pos+n-1 > ctx.stop then return incomplete(ctx) end
466  local code = ctx.code
467  if n == 1 then
468    local b1 = byte(code, pos, pos)
469    return b1
470  elseif n == 2 then
471    local b1, b2 = byte(code, pos, pos+1)
472    return b1+b2*256
473  else
474    local b1, b2, b3, b4 = byte(code, pos, pos+3)
475    local imm = b1+b2*256+b3*65536+b4*16777216
476    ctx.imm = imm
477    return imm
478  end
479end
480
481-- Process pattern string and generate the operands.
482local function putpat(ctx, name, pat)
483  local operands, regs, sz, mode, sp, rm, sc, rx, sdisp
484  local code, pos, stop, vexl = ctx.code, ctx.pos, ctx.stop, ctx.vexl
485
486  -- Chars used: 1DFGIMPQRSTUVWXYabcdfgijlmoprstuvwxyz
487  for p in gmatch(pat, ".") do
488    local x = nil
489    if p == "V" or p == "U" then
490      if ctx.rexw then sz = "Q"; ctx.rexw = false
491      elseif ctx.o16 then sz = "W"; ctx.o16 = false
492      elseif p == "U" and ctx.x64 then sz = "Q"
493      else sz = "D" end
494      regs = map_regs[sz]
495    elseif p == "T" then
496      if ctx.rexw then sz = "Q"; ctx.rexw = false else sz = "D" end
497      regs = map_regs[sz]
498    elseif p == "B" then
499      sz = "B"
500      regs = ctx.rex and map_regs.B64 or map_regs.B
501    elseif match(p, "[WDQMXYFG]") then
502      sz = p
503      if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
504      regs = map_regs[sz]
505    elseif p == "P" then
506      sz = ctx.o16 and "X" or "M"; ctx.o16 = false
507      if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
508      regs = map_regs[sz]
509    elseif p == "S" then
510      name = name..lower(sz)
511    elseif p == "s" then
512      local imm = getimm(ctx, pos, 1); if not imm then return end
513      x = imm <= 127 and format("+0x%02x", imm)
514		     or format("-0x%02x", 256-imm)
515      pos = pos+1
516    elseif p == "u" then
517      local imm = getimm(ctx, pos, 1); if not imm then return end
518      x = format("0x%02x", imm)
519      pos = pos+1
520    elseif p == "b" then
521      local imm = getimm(ctx, pos, 1); if not imm then return end
522      x = regs[imm/16+1]
523      pos = pos+1
524    elseif p == "w" then
525      local imm = getimm(ctx, pos, 2); if not imm then return end
526      x = format("0x%x", imm)
527      pos = pos+2
528    elseif p == "o" then -- [offset]
529      if ctx.x64 then
530	local imm1 = getimm(ctx, pos, 4); if not imm1 then return end
531	local imm2 = getimm(ctx, pos+4, 4); if not imm2 then return end
532	x = format("[0x%08x%08x]", imm2, imm1)
533	pos = pos+8
534      else
535	local imm = getimm(ctx, pos, 4); if not imm then return end
536	x = format("[0x%08x]", imm)
537	pos = pos+4
538      end
539    elseif p == "i" or p == "I" then
540      local n = map_sz2n[sz]
541      if n == 8 and ctx.x64 and p == "I" then
542	local imm1 = getimm(ctx, pos, 4); if not imm1 then return end
543	local imm2 = getimm(ctx, pos+4, 4); if not imm2 then return end
544	x = format("0x%08x%08x", imm2, imm1)
545      else
546	if n == 8 then n = 4 end
547	local imm = getimm(ctx, pos, n); if not imm then return end
548	if sz == "Q" and (imm < 0 or imm > 0x7fffffff) then
549	  imm = (0xffffffff+1)-imm
550	  x = format(imm > 65535 and "-0x%08x" or "-0x%x", imm)
551	else
552	  x = format(imm > 65535 and "0x%08x" or "0x%x", imm)
553	end
554      end
555      pos = pos+n
556    elseif p == "j" then
557      local n = map_sz2n[sz]
558      if n == 8 then n = 4 end
559      local imm = getimm(ctx, pos, n); if not imm then return end
560      if sz == "B" and imm > 127 then imm = imm-256
561      elseif imm > 2147483647 then imm = imm-4294967296 end
562      pos = pos+n
563      imm = imm + pos + ctx.addr
564      if imm > 4294967295 and not ctx.x64 then imm = imm-4294967296 end
565      ctx.imm = imm
566      if sz == "W" then
567	x = format("word 0x%04x", imm%65536)
568      elseif ctx.x64 then
569	local lo = imm % 0x1000000
570	x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo)
571      else
572	x = "0x"..tohex(imm)
573      end
574    elseif p == "R" then
575      local r = byte(code, pos-1, pos-1)%8
576      if ctx.rexb then r = r + 8; ctx.rexb = false end
577      x = regs[r+1]
578    elseif p == "a" then x = regs[1]
579    elseif p == "c" then x = "cl"
580    elseif p == "d" then x = "dx"
581    elseif p == "1" then x = "1"
582    else
583      if not mode then
584	mode = ctx.mrm
585	if not mode then
586	  if pos > stop then return incomplete(ctx) end
587	  mode = byte(code, pos, pos)
588	  pos = pos+1
589	end
590	rm = mode%8; mode = (mode-rm)/8
591	sp = mode%8; mode = (mode-sp)/8
592	sdisp = ""
593	if mode < 3 then
594	  if rm == 4 then
595	    if pos > stop then return incomplete(ctx) end
596	    sc = byte(code, pos, pos)
597	    pos = pos+1
598	    rm = sc%8; sc = (sc-rm)/8
599	    rx = sc%8; sc = (sc-rx)/8
600	    if ctx.rexx then rx = rx + 8; ctx.rexx = false end
601	    if rx == 4 then rx = nil end
602	  end
603	  if mode > 0 or rm == 5 then
604	    local dsz = mode
605	    if dsz ~= 1 then dsz = 4 end
606	    local disp = getimm(ctx, pos, dsz); if not disp then return end
607	    if mode == 0 then rm = nil end
608	    if rm or rx or (not sc and ctx.x64 and not ctx.a32) then
609	      if dsz == 1 and disp > 127 then
610		sdisp = format("-0x%x", 256-disp)
611	      elseif disp >= 0 and disp <= 0x7fffffff then
612		sdisp = format("+0x%x", disp)
613	      else
614		sdisp = format("-0x%x", (0xffffffff+1)-disp)
615	      end
616	    else
617	      sdisp = format(ctx.x64 and not ctx.a32 and
618		not (disp >= 0 and disp <= 0x7fffffff)
619		and "0xffffffff%08x" or "0x%08x", disp)
620	    end
621	    pos = pos+dsz
622	  end
623	end
624	if rm and ctx.rexb then rm = rm + 8; ctx.rexb = false end
625	if ctx.rexr then sp = sp + 8; ctx.rexr = false end
626      end
627      if p == "m" then
628	if mode == 3 then x = regs[rm+1]
629	else
630	  local aregs = ctx.a32 and map_regs.D or ctx.aregs
631	  local srm, srx = "", ""
632	  if rm then srm = aregs[rm+1]
633	  elseif not sc and ctx.x64 and not ctx.a32 then srm = "rip" end
634	  ctx.a32 = false
635	  if rx then
636	    if rm then srm = srm.."+" end
637	    srx = aregs[rx+1]
638	    if sc > 0 then srx = srx.."*"..(2^sc) end
639	  end
640	  x = format("[%s%s%s]", srm, srx, sdisp)
641	end
642	if mode < 3 and
643	   (not match(pat, "[aRrgp]") or match(pat, "t")) then -- Yuck.
644	  x = map_sz2prefix[sz].." "..x
645	end
646      elseif p == "r" then x = regs[sp+1]
647      elseif p == "g" then x = map_segregs[sp+1]
648      elseif p == "p" then -- Suppress prefix.
649      elseif p == "f" then x = "st"..rm
650      elseif p == "x" then
651	if sp == 0 and ctx.lock and not ctx.x64 then
652	  x = "CR8"; ctx.lock = false
653	else
654	  x = "CR"..sp
655	end
656      elseif p == "v" then
657	if ctx.vexv then
658	  x = regs[ctx.vexv+1]; ctx.vexv = false
659	end
660      elseif p == "y" then x = "DR"..sp
661      elseif p == "z" then x = "TR"..sp
662      elseif p == "l" then vexl = false
663      elseif p == "t" then
664      else
665	error("bad pattern `"..pat.."'")
666      end
667    end
668    if x then operands = operands and operands..", "..x or x end
669  end
670  ctx.pos = pos
671  return putop(ctx, name, operands)
672end
673
674-- Forward declaration.
675local map_act
676
677-- Fetch and cache MRM byte.
678local function getmrm(ctx)
679  local mrm = ctx.mrm
680  if not mrm then
681    local pos = ctx.pos
682    if pos > ctx.stop then return nil end
683    mrm = byte(ctx.code, pos, pos)
684    ctx.pos = pos+1
685    ctx.mrm = mrm
686  end
687  return mrm
688end
689
690-- Dispatch to handler depending on pattern.
691local function dispatch(ctx, opat, patgrp)
692  if not opat then return unknown(ctx) end
693  if match(opat, "%|") then -- MMX/SSE variants depending on prefix.
694    local p
695    if ctx.rep then
696      p = ctx.rep=="rep" and "%|([^%|]*)" or "%|[^%|]*%|[^%|]*%|([^%|]*)"
697      ctx.rep = false
698    elseif ctx.o16 then p = "%|[^%|]*%|([^%|]*)"; ctx.o16 = false
699    else p = "^[^%|]*" end
700    opat = match(opat, p)
701    if not opat then return unknown(ctx) end
702--    ctx.rep = false; ctx.o16 = false
703    --XXX fails for 66 f2 0f 38 f1 06  crc32 eax,WORD PTR [esi]
704    --XXX remove in branches?
705  end
706  if match(opat, "%$") then -- reg$mem variants.
707    local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end
708    opat = match(opat, mrm >= 192 and "^[^%$]*" or "%$(.*)")
709    if opat == "" then return unknown(ctx) end
710  end
711  if opat == "" then return unknown(ctx) end
712  local name, pat = match(opat, "^([a-z0-9 ]*)(.*)")
713  if pat == "" and patgrp then pat = patgrp end
714  return map_act[sub(pat, 1, 1)](ctx, name, pat)
715end
716
717-- Get a pattern from an opcode map and dispatch to handler.
718local function dispatchmap(ctx, opcmap)
719  local pos = ctx.pos
720  local opat = opcmap[byte(ctx.code, pos, pos)]
721  pos = pos + 1
722  ctx.pos = pos
723  return dispatch(ctx, opat)
724end
725
726-- Map for action codes. The key is the first char after the name.
727map_act = {
728  -- Simple opcodes without operands.
729  [""] = function(ctx, name, pat)
730    return putop(ctx, name)
731  end,
732
733  -- Operand size chars fall right through.
734  B = putpat, W = putpat, D = putpat, Q = putpat,
735  V = putpat, U = putpat, T = putpat,
736  M = putpat, X = putpat, P = putpat,
737  F = putpat, G = putpat, Y = putpat,
738
739  -- Collect prefixes.
740  [":"] = function(ctx, name, pat)
741    ctx[pat == ":" and name or sub(pat, 2)] = name
742    if ctx.pos - ctx.start > 5 then return unknown(ctx) end -- Limit #prefixes.
743  end,
744
745  -- Chain to special handler specified by name.
746  ["*"] = function(ctx, name, pat)
747    return map_act[name](ctx, name, sub(pat, 2))
748  end,
749
750  -- Use named subtable for opcode group.
751  ["!"] = function(ctx, name, pat)
752    local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end
753    return dispatch(ctx, map_opcgroup[name][((mrm-(mrm%8))/8)%8+1], sub(pat, 2))
754  end,
755
756  -- o16,o32[,o64] variants.
757  sz = function(ctx, name, pat)
758    if ctx.o16 then ctx.o16 = false
759    else
760      pat = match(pat, ",(.*)")
761      if ctx.rexw then
762	local p = match(pat, ",(.*)")
763	if p then pat = p; ctx.rexw = false end
764      end
765    end
766    pat = match(pat, "^[^,]*")
767    return dispatch(ctx, pat)
768  end,
769
770  -- Two-byte opcode dispatch.
771  opc2 = function(ctx, name, pat)
772    return dispatchmap(ctx, map_opc2)
773  end,
774
775  -- Three-byte opcode dispatch.
776  opc3 = function(ctx, name, pat)
777    return dispatchmap(ctx, map_opc3[pat])
778  end,
779
780  -- VMX/SVM dispatch.
781  vm = function(ctx, name, pat)
782    return dispatch(ctx, map_opcvm[ctx.mrm])
783  end,
784
785  -- Floating point opcode dispatch.
786  fp = function(ctx, name, pat)
787    local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end
788    local rm = mrm%8
789    local idx = pat*8 + ((mrm-rm)/8)%8
790    if mrm >= 192 then idx = idx + 64 end
791    local opat = map_opcfp[idx]
792    if type(opat) == "table" then opat = opat[rm+1] end
793    return dispatch(ctx, opat)
794  end,
795
796  -- REX prefix.
797  rex = function(ctx, name, pat)
798    if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed.
799    for p in gmatch(pat, ".") do ctx["rex"..p] = true end
800    ctx.rex = "rex"
801  end,
802
803  -- VEX prefix.
804  vex = function(ctx, name, pat)
805    if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed.
806    ctx.rex = "vex"
807    local pos = ctx.pos
808    if ctx.mrm then
809      ctx.mrm = nil
810      pos = pos-1
811    end
812    local b = byte(ctx.code, pos, pos)
813    if not b then return incomplete(ctx) end
814    pos = pos+1
815    if b < 128 then ctx.rexr = true end
816    local m = 1
817    if pat == "3" then
818      m = b%32; b = (b-m)/32
819      local nb = b%2; b = (b-nb)/2
820      if nb == 0 then ctx.rexb = true end
821      local nx = b%2
822      if nx == 0 then ctx.rexx = true end
823      b = byte(ctx.code, pos, pos)
824      if not b then return incomplete(ctx) end
825      pos = pos+1
826      if b >= 128 then ctx.rexw = true end
827    end
828    ctx.pos = pos
829    local map
830    if m == 1 then map = map_opc2
831    elseif m == 2 then map = map_opc3["38"]
832    elseif m == 3 then map = map_opc3["3a"]
833    else return unknown(ctx) end
834    local p = b%4; b = (b-p)/4
835    if p == 1 then ctx.o16 = "o16"
836    elseif p == 2 then ctx.rep = "rep"
837    elseif p == 3 then ctx.rep = "repne" end
838    local l = b%2; b = (b-l)/2
839    if l ~= 0 then ctx.vexl = true end
840    ctx.vexv = (-1-b)%16
841    return dispatchmap(ctx, map)
842  end,
843
844  -- Special case for nop with REX prefix.
845  nop = function(ctx, name, pat)
846    return dispatch(ctx, ctx.rex and pat or "nop")
847  end,
848
849  -- Special case for 0F 77.
850  emms = function(ctx, name, pat)
851    if ctx.rex ~= "vex" then
852      return putop(ctx, "emms")
853    elseif ctx.vexl then
854      ctx.vexl = false
855      return putop(ctx, "zeroall")
856    else
857      return putop(ctx, "zeroupper")
858    end
859  end,
860}
861
862------------------------------------------------------------------------------
863
864-- Disassemble a block of code.
865local function disass_block(ctx, ofs, len)
866  if not ofs then ofs = 0 end
867  local stop = len and ofs+len or #ctx.code
868  ofs = ofs + 1
869  ctx.start = ofs
870  ctx.pos = ofs
871  ctx.stop = stop
872  ctx.imm = nil
873  ctx.mrm = false
874  clearprefixes(ctx)
875  while ctx.pos <= stop do dispatchmap(ctx, ctx.map1) end
876  if ctx.pos ~= ctx.start then incomplete(ctx) end
877end
878
879-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
880local function create(code, addr, out)
881  local ctx = {}
882  ctx.code = code
883  ctx.addr = (addr or 0) - 1
884  ctx.out = out or io.write
885  ctx.symtab = {}
886  ctx.disass = disass_block
887  ctx.hexdump = 16
888  ctx.x64 = false
889  ctx.map1 = map_opc1_32
890  ctx.aregs = map_regs.D
891  return ctx
892end
893
894local function create64(code, addr, out)
895  local ctx = create(code, addr, out)
896  ctx.x64 = true
897  ctx.map1 = map_opc1_64
898  ctx.aregs = map_regs.Q
899  return ctx
900end
901
902-- Simple API: disassemble code (a string) at address and output via out.
903local function disass(code, addr, out)
904  create(code, addr, out):disass()
905end
906
907local function disass64(code, addr, out)
908  create64(code, addr, out):disass()
909end
910
911-- Return register name for RID.
912local function regname(r)
913  if r < 8 then return map_regs.D[r+1] end
914  return map_regs.X[r-7]
915end
916
917local function regname64(r)
918  if r < 16 then return map_regs.Q[r+1] end
919  return map_regs.X[r-15]
920end
921
922-- Public module functions.
923return {
924  create = create,
925  create64 = create64,
926  disass = disass,
927  disass64 = disass64,
928  regname = regname,
929  regname64 = regname64
930}
931
932