1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
6
7 #ifndef mozilla_interceptor_PatcherDetour_h
8 #define mozilla_interceptor_PatcherDetour_h
9
10 #if defined(_M_ARM64)
11 # include "mozilla/interceptor/Arm64.h"
12 #endif // defined(_M_ARM64)
13 #include <utility>
14
15 #include "mozilla/Maybe.h"
16 #include "mozilla/NativeNt.h"
17 #include "mozilla/ScopeExit.h"
18 #include "mozilla/TypedEnumBits.h"
19 #include "mozilla/Types.h"
20 #include "mozilla/Unused.h"
21 #include "mozilla/interceptor/PatcherBase.h"
22 #include "mozilla/interceptor/Trampoline.h"
23 #include "mozilla/interceptor/VMSharingPolicies.h"
24
25 #define COPY_CODES(NBYTES) \
26 do { \
27 tramp.CopyFrom(origBytes.GetAddress(), NBYTES); \
28 origBytes += NBYTES; \
29 } while (0)
30
31 namespace mozilla {
32 namespace interceptor {
33
34 enum class DetourFlags : uint32_t {
35 eDefault = 0,
36 eEnable10BytePatch = 1, // Allow 10-byte patches when conditions allow
37 eTestOnlyForceShortPatch =
38 2, // Force short patches at all times (x86-64 and arm64 testing only)
39 eDontResolveRedirection =
40 4, // Don't resolve the redirection of JMP (e.g. kernel32 -> kernelbase)
41 };
42
MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(DetourFlags)43 MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(DetourFlags)
44
45 // This class is responsible to do tasks which depend on MMPolicy, decoupled
46 // from VMPolicy. We already have WindowsDllPatcherBase, but it needs to
47 // depend on VMPolicy to hold an instance of VMPolicy as a member.
48 template <typename MMPolicyT>
49 class WindowsDllDetourPatcherPrimitive {
50 protected:
51 #if defined(_M_ARM64)
52 // LDR x16, .+8
53 static const uint32_t kLdrX16Plus8 = 0x58000050U;
54 #endif // defined(_M_ARM64)
55
56 static void ApplyDefaultPatch(WritableTargetFunction<MMPolicyT>& target,
57 intptr_t aDest) {
58 #if defined(_M_IX86)
59 target.WriteByte(0xe9); // jmp
60 target.WriteDisp32(aDest); // hook displacement
61 #elif defined(_M_X64)
62 // mov r11, address
63 target.WriteByte(0x49);
64 target.WriteByte(0xbb);
65 target.WritePointer(aDest);
66
67 // jmp r11
68 target.WriteByte(0x41);
69 target.WriteByte(0xff);
70 target.WriteByte(0xe3);
71 #elif defined(_M_ARM64)
72 // The default patch requires 16 bytes
73 // LDR x16, .+8
74 target.WriteLong(kLdrX16Plus8);
75 // BR x16
76 target.WriteLong(arm64::BuildUnconditionalBranchToRegister(16));
77 target.WritePointer(aDest);
78 #else
79 # error "Unsupported processor architecture"
80 #endif
81 }
82
83 public:
84 constexpr static uint32_t GetWorstCaseRequiredBytesToPatch() {
85 #if defined(_M_IX86)
86 return 5;
87 #elif defined(_M_X64)
88 return 13;
89 #elif defined(_M_ARM64)
90 return 16;
91 #else
92 # error "Unsupported processor architecture"
93 #endif
94 }
95
96 WindowsDllDetourPatcherPrimitive() = default;
97
98 WindowsDllDetourPatcherPrimitive(const WindowsDllDetourPatcherPrimitive&) =
99 delete;
100 WindowsDllDetourPatcherPrimitive(WindowsDllDetourPatcherPrimitive&&) = delete;
101 WindowsDllDetourPatcherPrimitive& operator=(
102 const WindowsDllDetourPatcherPrimitive&) = delete;
103 WindowsDllDetourPatcherPrimitive& operator=(
104 WindowsDllDetourPatcherPrimitive&&) = delete;
105
106 bool AddIrreversibleHook(const MMPolicyT& aMMPolicy, FARPROC aTargetFn,
107 intptr_t aHookDest) {
108 ReadOnlyTargetFunction<MMPolicyT> targetReadOnly(aMMPolicy, aTargetFn);
109
110 WritableTargetFunction<MMPolicyT> targetWritable(
111 targetReadOnly.Promote(GetWorstCaseRequiredBytesToPatch()));
112 if (!targetWritable) {
113 return false;
114 }
115
116 ApplyDefaultPatch(targetWritable, aHookDest);
117
118 return targetWritable.Commit();
119 }
120 };
121
122 template <typename VMPolicy>
123 class WindowsDllDetourPatcher final
124 : public WindowsDllDetourPatcherPrimitive<typename VMPolicy::MMPolicyT>,
125 public WindowsDllPatcherBase<VMPolicy> {
126 using MMPolicyT = typename VMPolicy::MMPolicyT;
127 using TrampPoolT = typename VMPolicy::PoolType;
128 using PrimitiveT = WindowsDllDetourPatcherPrimitive<MMPolicyT>;
129 Maybe<DetourFlags> mFlags;
130
131 public:
132 template <typename... Args>
WindowsDllDetourPatcher(Args &&...aArgs)133 explicit WindowsDllDetourPatcher(Args&&... aArgs)
134 : WindowsDllPatcherBase<VMPolicy>(std::forward<Args>(aArgs)...) {}
135
~WindowsDllDetourPatcher()136 ~WindowsDllDetourPatcher() { Clear(); }
137
138 WindowsDllDetourPatcher(const WindowsDllDetourPatcher&) = delete;
139 WindowsDllDetourPatcher(WindowsDllDetourPatcher&&) = delete;
140 WindowsDllDetourPatcher& operator=(const WindowsDllDetourPatcher&) = delete;
141 WindowsDllDetourPatcher& operator=(WindowsDllDetourPatcher&&) = delete;
142
Clear()143 void Clear() {
144 if (!this->mVMPolicy.ShouldUnhookUponDestruction()) {
145 return;
146 }
147
148 #if defined(_M_IX86)
149 size_t nBytes = 1 + sizeof(intptr_t);
150 #elif defined(_M_X64)
151 size_t nBytes = 2 + sizeof(intptr_t);
152 #elif defined(_M_ARM64)
153 size_t nBytes = 2 * sizeof(uint32_t) + sizeof(uintptr_t);
154 #else
155 # error "Unknown processor type"
156 #endif
157
158 const auto& tramps = this->mVMPolicy.Items();
159 for (auto&& tramp : tramps) {
160 // First we read the pointer to the interceptor instance.
161 Maybe<uintptr_t> instance = tramp.ReadEncodedPointer();
162 if (!instance) {
163 continue;
164 }
165
166 if (instance.value() != reinterpret_cast<uintptr_t>(this)) {
167 // tramp does not belong to this interceptor instance.
168 continue;
169 }
170
171 auto clearInstance = MakeScopeExit([&tramp]() -> void {
172 // Clear the instance pointer so that no future instances with the same
173 // |this| pointer will attempt to reset its hook.
174 tramp.Rewind();
175 tramp.WriteEncodedPointer(nullptr);
176 });
177
178 // Now we read the pointer to the intercepted function.
179 Maybe<uintptr_t> interceptedFn = tramp.ReadEncodedPointer();
180 if (!interceptedFn) {
181 continue;
182 }
183
184 WritableTargetFunction<MMPolicyT> origBytes(
185 this->mVMPolicy, interceptedFn.value(), nBytes);
186 if (!origBytes) {
187 continue;
188 }
189
190 #if defined(_M_IX86) || defined(_M_X64)
191
192 Maybe<uint8_t> maybeOpcode1 = origBytes.ReadByte();
193 if (!maybeOpcode1) {
194 continue;
195 }
196
197 uint8_t opcode1 = maybeOpcode1.value();
198
199 # if defined(_M_IX86)
200 // Ensure the JMP from CreateTrampoline is where we expect it to be.
201 MOZ_ASSERT(opcode1 == 0xE9);
202 if (opcode1 != 0xE9) {
203 continue;
204 }
205
206 intptr_t startOfTrampInstructions =
207 static_cast<intptr_t>(tramp.GetCurrentRemoteAddress());
208
209 origBytes.WriteDisp32(startOfTrampInstructions);
210 if (!origBytes) {
211 continue;
212 }
213
214 origBytes.Commit();
215 # elif defined(_M_X64)
216 if (opcode1 == 0x49) {
217 if (!Clear13BytePatch(origBytes, tramp.GetCurrentRemoteAddress())) {
218 continue;
219 }
220 } else if (opcode1 == 0xB8) {
221 if (!Clear10BytePatch(origBytes)) {
222 continue;
223 }
224 } else if (opcode1 == 0x48) {
225 // The original function was just a different trampoline
226 if (!ClearTrampolinePatch(origBytes, tramp.GetCurrentRemoteAddress())) {
227 continue;
228 }
229 } else {
230 MOZ_ASSERT_UNREACHABLE("Unrecognized patch!");
231 continue;
232 }
233 # endif
234
235 #elif defined(_M_ARM64)
236
237 // Ensure that we see the instruction that we expect
238 Maybe<uint32_t> inst1 = origBytes.ReadLong();
239 if (!inst1) {
240 continue;
241 }
242
243 if (inst1.value() == this->kLdrX16Plus8) {
244 if (!Clear16BytePatch(origBytes, tramp.GetCurrentRemoteAddress())) {
245 continue;
246 }
247 } else if (arm64::IsUnconditionalBranchImm(inst1.value())) {
248 if (!Clear4BytePatch(inst1.value(), origBytes)) {
249 continue;
250 }
251 } else {
252 MOZ_ASSERT_UNREACHABLE("Unrecognized patch!");
253 continue;
254 }
255
256 #else
257 # error "Unknown processor type"
258 #endif
259 }
260
261 this->mVMPolicy.Clear();
262 }
263
264 #if defined(_M_X64)
Clear13BytePatch(WritableTargetFunction<MMPolicyT> & aOrigBytes,const uintptr_t aResetToAddress)265 bool Clear13BytePatch(WritableTargetFunction<MMPolicyT>& aOrigBytes,
266 const uintptr_t aResetToAddress) {
267 Maybe<uint8_t> maybeOpcode2 = aOrigBytes.ReadByte();
268 if (!maybeOpcode2) {
269 return false;
270 }
271
272 uint8_t opcode2 = maybeOpcode2.value();
273 if (opcode2 != 0xBB) {
274 return false;
275 }
276
277 aOrigBytes.WritePointer(aResetToAddress);
278 if (!aOrigBytes) {
279 return false;
280 }
281
282 return aOrigBytes.Commit();
283 }
284
ClearTrampolinePatch(WritableTargetFunction<MMPolicyT> & aOrigBytes,const uintptr_t aPtrToResetToAddress)285 bool ClearTrampolinePatch(WritableTargetFunction<MMPolicyT>& aOrigBytes,
286 const uintptr_t aPtrToResetToAddress) {
287 // The target of the trampoline we replaced is stored at
288 // aPtrToResetToAddress. We simply put it back where we got it from.
289 Maybe<uint8_t> maybeOpcode2 = aOrigBytes.ReadByte();
290 if (!maybeOpcode2) {
291 return false;
292 }
293
294 uint8_t opcode2 = maybeOpcode2.value();
295 if (opcode2 != 0xB8) {
296 return false;
297 }
298
299 auto oldPtr = *(reinterpret_cast<const uintptr_t*>(aPtrToResetToAddress));
300
301 aOrigBytes.WritePointer(oldPtr);
302 if (!aOrigBytes) {
303 return false;
304 }
305
306 return aOrigBytes.Commit();
307 }
308
Clear10BytePatch(WritableTargetFunction<MMPolicyT> & aOrigBytes)309 bool Clear10BytePatch(WritableTargetFunction<MMPolicyT>& aOrigBytes) {
310 Maybe<uint32_t> maybePtr32 = aOrigBytes.ReadLong();
311 if (!maybePtr32) {
312 return false;
313 }
314
315 uint32_t ptr32 = maybePtr32.value();
316 // We expect the high bit to be clear
317 if (ptr32 & 0x80000000) {
318 return false;
319 }
320
321 uintptr_t trampPtr = ptr32;
322
323 // trampPtr points to an intermediate trampoline that contains a 13-byte
324 // patch. We back up by sizeof(uintptr_t) so that we can access the pointer
325 // to the stub trampoline.
326 WritableTargetFunction<MMPolicyT> writableIntermediate(
327 this->mVMPolicy, trampPtr - sizeof(uintptr_t), 13 + sizeof(uintptr_t));
328 if (!writableIntermediate) {
329 return false;
330 }
331
332 Maybe<uintptr_t> stubTramp = writableIntermediate.ReadEncodedPtr();
333 if (!stubTramp || !stubTramp.value()) {
334 return false;
335 }
336
337 Maybe<uint8_t> maybeOpcode1 = writableIntermediate.ReadByte();
338 if (!maybeOpcode1) {
339 return false;
340 }
341
342 // We expect this opcode to be the beginning of our normal mov r11, ptr
343 // patch sequence.
344 uint8_t opcode1 = maybeOpcode1.value();
345 if (opcode1 != 0x49) {
346 return false;
347 }
348
349 // Now we can just delegate the rest to our normal 13-byte patch clearing.
350 return Clear13BytePatch(writableIntermediate, stubTramp.value());
351 }
352 #endif // defined(_M_X64)
353
354 #if defined(_M_ARM64)
Clear4BytePatch(const uint32_t aBranchImm,WritableTargetFunction<MMPolicyT> & aOrigBytes)355 bool Clear4BytePatch(const uint32_t aBranchImm,
356 WritableTargetFunction<MMPolicyT>& aOrigBytes) {
357 MOZ_ASSERT(arm64::IsUnconditionalBranchImm(aBranchImm));
358
359 arm64::LoadOrBranch decoded = arm64::BUncondImmDecode(
360 aOrigBytes.GetCurrentAddress() - sizeof(uint32_t), aBranchImm);
361
362 uintptr_t trampPtr = decoded.mAbsAddress;
363
364 // trampPtr points to an intermediate trampoline that contains a veneer.
365 // We back up by sizeof(uintptr_t) so that we can access the pointer to the
366 // stub trampoline.
367
368 // We want trampLen to be the size of the veneer, plus one pointer (since
369 // we are backing up trampPtr by one pointer)
370 size_t trampLen = 16 + sizeof(uintptr_t);
371
372 WritableTargetFunction<MMPolicyT> writableIntermediate(
373 this->mVMPolicy, trampPtr - sizeof(uintptr_t), trampLen);
374 if (!writableIntermediate) {
375 return false;
376 }
377
378 Maybe<uintptr_t> stubTramp = writableIntermediate.ReadEncodedPtr();
379 if (!stubTramp || !stubTramp.value()) {
380 return false;
381 }
382
383 Maybe<uint32_t> inst1 = writableIntermediate.ReadLong();
384 if (!inst1 || inst1.value() != this->kLdrX16Plus8) {
385 return false;
386 }
387
388 return Clear16BytePatch(writableIntermediate, stubTramp.value());
389 }
390
Clear16BytePatch(WritableTargetFunction<MMPolicyT> & aOrigBytes,const uintptr_t aResetToAddress)391 bool Clear16BytePatch(WritableTargetFunction<MMPolicyT>& aOrigBytes,
392 const uintptr_t aResetToAddress) {
393 Maybe<uint32_t> inst2 = aOrigBytes.ReadLong();
394 if (!inst2) {
395 return false;
396 }
397
398 if (inst2.value() != arm64::BuildUnconditionalBranchToRegister(16)) {
399 MOZ_ASSERT_UNREACHABLE("Unrecognized patch!");
400 return false;
401 }
402
403 // Clobber the pointer to our hook function with a pointer to the
404 // start of the trampoline.
405 aOrigBytes.WritePointer(aResetToAddress);
406 aOrigBytes.Commit();
407
408 return true;
409 }
410 #endif // defined(_M_ARM64)
411
412 void Init(DetourFlags aFlags = DetourFlags::eDefault) {
413 if (Initialized()) {
414 return;
415 }
416
417 #if defined(_M_X64)
418 if (aFlags & DetourFlags::eTestOnlyForceShortPatch) {
419 aFlags |= DetourFlags::eEnable10BytePatch;
420 }
421 #endif // defined(_M_X64)
422
423 mFlags = Some(aFlags);
424 }
425
Initialized()426 bool Initialized() const { return mFlags.isSome(); }
427
AddHook(FARPROC aTargetFn,intptr_t aHookDest,void ** aOrigFunc)428 bool AddHook(FARPROC aTargetFn, intptr_t aHookDest, void** aOrigFunc) {
429 ReadOnlyTargetFunction<MMPolicyT> target(
430 (mFlags.value() & DetourFlags::eDontResolveRedirection)
431 ? ReadOnlyTargetFunction<MMPolicyT>(
432 this->mVMPolicy, reinterpret_cast<uintptr_t>(aTargetFn))
433 : this->ResolveRedirectedAddress(aTargetFn));
434
435 TrampPoolT* trampPool = nullptr;
436
437 #if defined(_M_ARM64)
438 // ARM64 uses two passes to build its trampoline. The first pass uses a
439 // null tramp to determine how many bytes are needed. Once that is known,
440 // CreateTrampoline calls itself recursively with a "real" tramp.
441 Trampoline<MMPolicyT> tramp(nullptr);
442 #else
443 Maybe<TrampPoolT> maybeTrampPool = DoReserve();
444 MOZ_ASSERT(maybeTrampPool);
445 if (!maybeTrampPool) {
446 return false;
447 }
448
449 trampPool = maybeTrampPool.ptr();
450
451 Maybe<Trampoline<MMPolicyT>> maybeTramp(trampPool->GetNextTrampoline());
452 if (!maybeTramp) {
453 this->SetLastDetourError(
454 DetourResultCode::DETOUR_PATCHER_NEXT_TRAMPOLINE_ERROR);
455 return false;
456 }
457
458 Trampoline<MMPolicyT> tramp(std::move(maybeTramp.ref()));
459 #endif
460
461 CreateTrampoline(target, trampPool, tramp, aHookDest, aOrigFunc);
462 if (!*aOrigFunc) {
463 return false;
464 }
465
466 return true;
467 }
468
469 private:
470 /**
471 * This function returns a maximum distance that can be reached by a single
472 * unconditional jump instruction. This is dependent on the processor ISA.
473 * Note that this distance is *exclusive* when added to the pivot, so the
474 * distance returned by this function is actually
475 * (maximum_absolute_offset + 1).
476 */
GetDefaultPivotDistance()477 static uint32_t GetDefaultPivotDistance() {
478 #if defined(_M_ARM64)
479 // Immediate unconditional branch allows for +/- 128MB
480 return 0x08000000U;
481 #elif defined(_M_IX86) || defined(_M_X64)
482 // For these ISAs, our distance will assume the use of an unconditional jmp
483 // with a 32-bit signed displacement.
484 return 0x80000000U;
485 #else
486 # error "Not defined for this processor arch"
487 #endif
488 }
489
490 /**
491 * If we're reserving trampoline space for a specific module, we base the
492 * pivot off of the median address of the module's .text section. While this
493 * may not be precise, it should be accurate enough for our purposes: To
494 * ensure that the trampoline space is reachable by any executable code in the
495 * module.
496 */
ReserveForModule(HMODULE aModule)497 Maybe<TrampPoolT> ReserveForModule(HMODULE aModule) {
498 nt::PEHeaders moduleHeaders(aModule);
499 if (!moduleHeaders) {
500 this->SetLastDetourError(
501 DetourResultCode::DETOUR_PATCHER_RESERVE_FOR_MODULE_PE_ERROR);
502 return Nothing();
503 }
504
505 Maybe<Span<const uint8_t>> textSectionInfo =
506 moduleHeaders.GetTextSectionInfo();
507 if (!textSectionInfo) {
508 this->SetLastDetourError(
509 DetourResultCode::DETOUR_PATCHER_RESERVE_FOR_MODULE_TEXT_ERROR);
510 return Nothing();
511 }
512
513 const uint8_t* median = textSectionInfo.value().data() +
514 (textSectionInfo.value().LengthBytes() / 2);
515
516 Maybe<TrampPoolT> maybeTrampPool = this->mVMPolicy.Reserve(
517 reinterpret_cast<uintptr_t>(median), GetDefaultPivotDistance());
518 if (!maybeTrampPool) {
519 this->SetLastDetourError(
520 DetourResultCode::DETOUR_PATCHER_RESERVE_FOR_MODULE_RESERVE_ERROR);
521 }
522 return maybeTrampPool;
523 }
524
525 Maybe<TrampPoolT> DoReserve(HMODULE aModule = nullptr) {
526 if (aModule) {
527 return ReserveForModule(aModule);
528 }
529
530 uintptr_t pivot = 0;
531 uint32_t distance = 0;
532
533 #if defined(_M_X64)
534 if (mFlags.value() & DetourFlags::eEnable10BytePatch) {
535 // We must stay below the 2GB mark because a 10-byte patch uses movsxd
536 // (ie, sign extension) to expand the pointer to 64-bits, so bit 31 of any
537 // pointers into the reserved region must be 0.
538 pivot = 0x40000000U;
539 distance = 0x40000000U;
540 }
541 #endif // defined(_M_X64)
542
543 Maybe<TrampPoolT> maybeTrampPool = this->mVMPolicy.Reserve(pivot, distance);
544 #if defined(NIGHTLY_BUILD)
545 if (!maybeTrampPool && this->GetLastDetourError().isNothing()) {
546 this->SetLastDetourError(
547 DetourResultCode::DETOUR_PATCHER_DO_RESERVE_ERROR);
548 }
549 #endif // defined(NIGHTLY_BUILD)
550 return maybeTrampPool;
551 }
552
553 protected:
554 #if !defined(_M_ARM64)
555
556 const static int kPageSize = 4096;
557
558 // rex bits
559 static const BYTE kMaskHighNibble = 0xF0;
560 static const BYTE kRexOpcode = 0x40;
561 static const BYTE kMaskRexW = 0x08;
562 static const BYTE kMaskRexR = 0x04;
563 static const BYTE kMaskRexX = 0x02;
564 static const BYTE kMaskRexB = 0x01;
565
566 // mod r/m bits
567 static const BYTE kRegFieldShift = 3;
568 static const BYTE kMaskMod = 0xC0;
569 static const BYTE kMaskReg = 0x38;
570 static const BYTE kMaskRm = 0x07;
571 static const BYTE kRmNeedSib = 0x04;
572 static const BYTE kModReg = 0xC0;
573 static const BYTE kModDisp32 = 0x80;
574 static const BYTE kModDisp8 = 0x40;
575 static const BYTE kModNoRegDisp = 0x00;
576 static const BYTE kRmNoRegDispDisp32 = 0x05;
577
578 // sib bits
579 static const BYTE kMaskSibScale = 0xC0;
580 static const BYTE kMaskSibIndex = 0x38;
581 static const BYTE kMaskSibBase = 0x07;
582 static const BYTE kSibBaseEbp = 0x05;
583
584 // Register bit IDs.
585 static const BYTE kRegAx = 0x0;
586 static const BYTE kRegCx = 0x1;
587 static const BYTE kRegDx = 0x2;
588 static const BYTE kRegBx = 0x3;
589 static const BYTE kRegSp = 0x4;
590 static const BYTE kRegBp = 0x5;
591 static const BYTE kRegSi = 0x6;
592 static const BYTE kRegDi = 0x7;
593
594 // Special ModR/M codes. These indicate operands that cannot be simply
595 // memcpy-ed.
596 // Operand is a 64-bit RIP-relative address.
597 static const int kModOperand64 = -2;
598 // Operand is not yet handled by our trampoline.
599 static const int kModUnknown = -1;
600
601 /**
602 * Returns the number of bytes taken by the ModR/M byte, SIB (if present)
603 * and the instruction's operand. In special cases, the special MODRM codes
604 * above are returned.
605 * aModRm points to the ModR/M byte of the instruction.
606 * On return, aSubOpcode (if present) is filled with the subopcode/register
607 * code found in the ModR/M byte.
608 */
609 int CountModRmSib(const ReadOnlyTargetFunction<MMPolicyT>& aModRm,
610 BYTE* aSubOpcode = nullptr) {
611 int numBytes = 1; // Start with 1 for mod r/m byte itself
612 switch (*aModRm & kMaskMod) {
613 case kModReg:
614 return numBytes;
615 case kModDisp8:
616 numBytes += 1;
617 break;
618 case kModDisp32:
619 numBytes += 4;
620 break;
621 case kModNoRegDisp:
622 if ((*aModRm & kMaskRm) == kRmNoRegDispDisp32) {
623 # if defined(_M_X64)
624 if (aSubOpcode) {
625 *aSubOpcode = (*aModRm & kMaskReg) >> kRegFieldShift;
626 }
627 return kModOperand64;
628 # else
629 // On IA-32, all ModR/M instruction modes address memory relative to 0
630 numBytes += 4;
631 # endif
632 } else if (((*aModRm & kMaskRm) == kRmNeedSib &&
633 (*(aModRm + 1) & kMaskSibBase) == kSibBaseEbp)) {
634 numBytes += 4;
635 }
636 break;
637 default:
638 // This should not be reachable
639 MOZ_ASSERT_UNREACHABLE("Impossible value for modr/m byte mod bits");
640 return kModUnknown;
641 }
642 if ((*aModRm & kMaskRm) == kRmNeedSib) {
643 // SIB byte
644 numBytes += 1;
645 }
646 if (aSubOpcode) {
647 *aSubOpcode = (*aModRm & kMaskReg) >> kRegFieldShift;
648 }
649 return numBytes;
650 }
651
652 # if defined(_M_X64)
653 enum class JumpType{Je, Jne, Jae, Jmp, Call};
654
GenerateJump(Trampoline<MMPolicyT> & aTramp,uintptr_t aAbsTargetAddress,const JumpType aType)655 static bool GenerateJump(Trampoline<MMPolicyT>& aTramp,
656 uintptr_t aAbsTargetAddress, const JumpType aType) {
657 // Near call, absolute indirect, address given in r/m32
658 if (aType == JumpType::Call) {
659 // CALL [RIP+0]
660 aTramp.WriteByte(0xff);
661 aTramp.WriteByte(0x15);
662 // The offset to jump destination -- 2 bytes after the current position.
663 aTramp.WriteInteger(2);
664 aTramp.WriteByte(0xeb); // JMP + 8 (jump over target address)
665 aTramp.WriteByte(8);
666 aTramp.WritePointer(aAbsTargetAddress);
667 return !!aTramp;
668 }
669
670 // Write an opposite conditional jump because the destination branches
671 // are swapped.
672 if (aType == JumpType::Je) {
673 // JNE RIP+14
674 aTramp.WriteByte(0x75);
675 aTramp.WriteByte(14);
676 } else if (aType == JumpType::Jne) {
677 // JE RIP+14
678 aTramp.WriteByte(0x74);
679 aTramp.WriteByte(14);
680 } else if (aType == JumpType::Jae) {
681 // JB RIP+14
682 aTramp.WriteByte(0x72);
683 aTramp.WriteByte(14);
684 }
685
686 // Near jmp, absolute indirect, address given in r/m32
687 // JMP [RIP+0]
688 aTramp.WriteByte(0xff);
689 aTramp.WriteByte(0x25);
690 // The offset to jump destination is 0
691 aTramp.WriteInteger(0);
692 aTramp.WritePointer(aAbsTargetAddress);
693
694 return !!aTramp;
695 }
696 # endif
697
698 enum ePrefixGroupBits{eNoPrefixes = 0, ePrefixGroup1 = (1 << 0),
699 ePrefixGroup2 = (1 << 1), ePrefixGroup3 = (1 << 2),
700 ePrefixGroup4 = (1 << 3)};
701
CountPrefixBytes(const ReadOnlyTargetFunction<MMPolicyT> & aBytes,unsigned char * aOutGroupBits)702 int CountPrefixBytes(const ReadOnlyTargetFunction<MMPolicyT>& aBytes,
703 unsigned char* aOutGroupBits) {
704 unsigned char& groupBits = *aOutGroupBits;
705 groupBits = eNoPrefixes;
706 int index = 0;
707 while (true) {
708 switch (aBytes[index]) {
709 // Group 1
710 case 0xF0: // LOCK
711 case 0xF2: // REPNZ
712 case 0xF3: // REP / REPZ
713 if (groupBits & ePrefixGroup1) {
714 return -1;
715 }
716 groupBits |= ePrefixGroup1;
717 ++index;
718 break;
719
720 // Group 2
721 case 0x2E: // CS override / branch not taken
722 case 0x36: // SS override
723 case 0x3E: // DS override / branch taken
724 case 0x64: // FS override
725 case 0x65: // GS override
726 if (groupBits & ePrefixGroup2) {
727 return -1;
728 }
729 groupBits |= ePrefixGroup2;
730 ++index;
731 break;
732
733 // Group 3
734 case 0x66: // operand size override
735 if (groupBits & ePrefixGroup3) {
736 return -1;
737 }
738 groupBits |= ePrefixGroup3;
739 ++index;
740 break;
741
742 // Group 4
743 case 0x67: // Address size override
744 if (groupBits & ePrefixGroup4) {
745 return -1;
746 }
747 groupBits |= ePrefixGroup4;
748 ++index;
749 break;
750
751 default:
752 return index;
753 }
754 }
755 }
756
757 // Return a ModR/M byte made from the 2 Mod bits, the register used for the
758 // reg bits and the register used for the R/M bits.
BuildModRmByte(BYTE aModBits,BYTE aReg,BYTE aRm)759 BYTE BuildModRmByte(BYTE aModBits, BYTE aReg, BYTE aRm) {
760 MOZ_ASSERT((aRm & kMaskRm) == aRm);
761 MOZ_ASSERT((aModBits & kMaskMod) == aModBits);
762 MOZ_ASSERT(((aReg << kRegFieldShift) & kMaskReg) ==
763 (aReg << kRegFieldShift));
764 return aModBits | (aReg << kRegFieldShift) | aRm;
765 }
766
767 #endif // !defined(_M_ARM64)
768
769 // If originalFn is a recognized trampoline then patch it to call aDest,
770 // set *aTramp and *aOutTramp to that trampoline's target and return true.
PatchIfTargetIsRecognizedTrampoline(Trampoline<MMPolicyT> & aTramp,ReadOnlyTargetFunction<MMPolicyT> & aOriginalFn,intptr_t aDest,void ** aOutTramp)771 bool PatchIfTargetIsRecognizedTrampoline(
772 Trampoline<MMPolicyT>& aTramp,
773 ReadOnlyTargetFunction<MMPolicyT>& aOriginalFn, intptr_t aDest,
774 void** aOutTramp) {
775 #if defined(_M_X64)
776 // Variation 1:
777 // 48 b8 imm64 mov rax, imm64
778 // ff e0 jmp rax
779 //
780 // Variation 2:
781 // 48 b8 imm64 mov rax, imm64
782 // 50 push rax
783 // c3 ret
784 if ((aOriginalFn[0] == 0x48) && (aOriginalFn[1] == 0xB8) &&
785 ((aOriginalFn[10] == 0xFF && aOriginalFn[11] == 0xE0) ||
786 (aOriginalFn[10] == 0x50 && aOriginalFn[11] == 0xC3))) {
787 uintptr_t originalTarget =
788 (aOriginalFn + 2).template ChasePointer<uintptr_t>();
789
790 // Skip the first two bytes (48 b8) so that we can overwrite the imm64
791 WritableTargetFunction<MMPolicyT> target(aOriginalFn.Promote(8, 2));
792 if (!target) {
793 return false;
794 }
795
796 // Write the new JMP target address.
797 target.WritePointer(aDest);
798 if (!target.Commit()) {
799 return false;
800 }
801
802 // Store the old target address so we can restore it when we're cleared
803 aTramp.WritePointer(originalTarget);
804 if (!aTramp) {
805 return false;
806 }
807
808 *aOutTramp = reinterpret_cast<void*>(originalTarget);
809 return true;
810 }
811 #endif // defined(_M_X64)
812
813 return false;
814 }
815
816 #if defined(_M_ARM64)
Apply4BytePatch(TrampPoolT * aTrampPool,void * aTrampPtr,WritableTargetFunction<MMPolicyT> & target,intptr_t aDest)817 bool Apply4BytePatch(TrampPoolT* aTrampPool, void* aTrampPtr,
818 WritableTargetFunction<MMPolicyT>& target,
819 intptr_t aDest) {
820 MOZ_ASSERT(aTrampPool);
821 if (!aTrampPool) {
822 return false;
823 }
824
825 uintptr_t hookDest = arm64::MakeVeneer(*aTrampPool, aTrampPtr, aDest);
826 if (!hookDest) {
827 return false;
828 }
829
830 Maybe<uint32_t> branchImm = arm64::BuildUnconditionalBranchImm(
831 target.GetCurrentAddress(), hookDest);
832 if (!branchImm) {
833 return false;
834 }
835
836 target.WriteLong(branchImm.value());
837
838 return true;
839 }
840 #endif // defined(_M_ARM64)
841
842 #if defined(_M_X64)
Apply10BytePatch(TrampPoolT * aTrampPool,void * aTrampPtr,WritableTargetFunction<MMPolicyT> & target,intptr_t aDest)843 bool Apply10BytePatch(TrampPoolT* aTrampPool, void* aTrampPtr,
844 WritableTargetFunction<MMPolicyT>& target,
845 intptr_t aDest) {
846 // Note: Even if the target function is also below 2GB, we still use an
847 // intermediary trampoline so that we consistently have a 64-bit pointer
848 // that we can use to reset the trampoline upon interceptor shutdown.
849 Maybe<Trampoline<MMPolicyT>> maybeCallTramp(
850 aTrampPool->GetNextTrampoline());
851 if (!maybeCallTramp) {
852 return false;
853 }
854
855 Trampoline<MMPolicyT> callTramp(std::move(maybeCallTramp.ref()));
856
857 // Write a null instance so that Clear() does not consider this tramp to
858 // be a normal tramp to be torn down.
859 callTramp.WriteEncodedPointer(nullptr);
860 // Use the second pointer slot to store a pointer to the primary tramp
861 callTramp.WriteEncodedPointer(aTrampPtr);
862 callTramp.StartExecutableCode();
863
864 // mov r11, address
865 callTramp.WriteByte(0x49);
866 callTramp.WriteByte(0xbb);
867 callTramp.WritePointer(aDest);
868
869 // jmp r11
870 callTramp.WriteByte(0x41);
871 callTramp.WriteByte(0xff);
872 callTramp.WriteByte(0xe3);
873
874 void* callTrampStart = callTramp.EndExecutableCode();
875 if (!callTrampStart) {
876 return false;
877 }
878
879 target.WriteByte(0xB8); // MOV EAX, IMM32
880
881 // Assert that the topmost 33 bits are 0
882 MOZ_ASSERT(
883 !(reinterpret_cast<uintptr_t>(callTrampStart) & (~0x7FFFFFFFULL)));
884
885 target.WriteLong(static_cast<uint32_t>(
886 reinterpret_cast<uintptr_t>(callTrampStart) & 0x7FFFFFFFU));
887 target.WriteByte(0x48); // REX.W
888 target.WriteByte(0x63); // MOVSXD r64, r/m32
889 // dest: rax, src: eax
890 target.WriteByte(BuildModRmByte(kModReg, kRegAx, kRegAx));
891 target.WriteByte(0xFF); // JMP /4
892 target.WriteByte(BuildModRmByte(kModReg, 4, kRegAx)); // rax
893
894 return true;
895 }
896 #endif // defined(_M_X64)
897
CreateTrampoline(ReadOnlyTargetFunction<MMPolicyT> & origBytes,TrampPoolT * aTrampPool,Trampoline<MMPolicyT> & aTramp,intptr_t aDest,void ** aOutTramp)898 void CreateTrampoline(ReadOnlyTargetFunction<MMPolicyT>& origBytes,
899 TrampPoolT* aTrampPool, Trampoline<MMPolicyT>& aTramp,
900 intptr_t aDest, void** aOutTramp) {
901 *aOutTramp = nullptr;
902
903 Trampoline<MMPolicyT>& tramp = aTramp;
904 if (!tramp) {
905 this->SetLastDetourError(
906 DetourResultCode::DETOUR_PATCHER_INVALID_TRAMPOLINE);
907 return;
908 }
909
910 // The beginning of the trampoline contains two pointer-width slots:
911 // [0]: |this|, so that we know whether the trampoline belongs to us;
912 // [1]: Pointer to original function, so that we can reset the hooked
913 // function to its original behavior upon destruction. In rare cases
914 // where the function was already a different trampoline, this is
915 // just a pointer to that trampoline's target address.
916 tramp.WriteEncodedPointer(this);
917 if (!tramp) {
918 this->SetLastDetourError(
919 DetourResultCode::DETOUR_PATCHER_WRITE_POINTER_ERROR);
920 return;
921 }
922
923 auto clearInstanceOnFailure = MakeScopeExit([this, aOutTramp, &tramp,
924 &origBytes]() -> void {
925 // *aOutTramp is not set until CreateTrampoline has completed
926 // successfully, so we can use that to check for success.
927 if (*aOutTramp) {
928 return;
929 }
930
931 // Clear the instance pointer so that we don't try to reset a
932 // nonexistent hook.
933 tramp.Rewind();
934 tramp.WriteEncodedPointer(nullptr);
935
936 #if defined(NIGHTLY_BUILD)
937 origBytes.Rewind();
938 this->SetLastDetourError(
939 DetourResultCode::DETOUR_PATCHER_CREATE_TRAMPOLINE_ERROR);
940 DetourError& lastError = *this->mVMPolicy.mLastError;
941 size_t bytesToCapture = std::min(
942 ArrayLength(lastError.mOrigBytes),
943 static_cast<size_t>(PrimitiveT::GetWorstCaseRequiredBytesToPatch()));
944 # if defined(_M_ARM64)
945 size_t numInstructionsToCapture = bytesToCapture / sizeof(uint32_t);
946 auto origBytesDst = reinterpret_cast<uint32_t*>(lastError.mOrigBytes);
947 for (size_t i = 0; i < numInstructionsToCapture; ++i) {
948 origBytesDst[i] = origBytes.ReadNextInstruction();
949 }
950 # else
951 for (size_t i = 0; i < bytesToCapture; ++i) {
952 lastError.mOrigBytes[i] = origBytes[i];
953 }
954 # endif // defined(_M_ARM64)
955 #else
956 // Silence -Wunused-lambda-capture in non-Nightly.
957 Unused << this;
958 Unused << origBytes;
959 #endif // defined(NIGHTLY_BUILD)
960 });
961
962 tramp.WritePointer(origBytes.AsEncodedPtr());
963 if (!tramp) {
964 return;
965 }
966
967 if (PatchIfTargetIsRecognizedTrampoline(tramp, origBytes, aDest,
968 aOutTramp)) {
969 return;
970 }
971
972 tramp.StartExecutableCode();
973
974 constexpr uint32_t kWorstCaseBytesRequired =
975 PrimitiveT::GetWorstCaseRequiredBytesToPatch();
976
977 #if defined(_M_IX86)
978 int pJmp32 = -1;
979 while (origBytes.GetOffset() < kWorstCaseBytesRequired) {
980 // Understand some simple instructions that might be found in a
981 // prologue; we might need to extend this as necessary.
982 //
983 // Note! If we ever need to understand jump instructions, we'll
984 // need to rewrite the displacement argument.
985 unsigned char prefixGroups;
986 int numPrefixBytes = CountPrefixBytes(origBytes, &prefixGroups);
987 if (numPrefixBytes < 0 ||
988 (prefixGroups & (ePrefixGroup3 | ePrefixGroup4))) {
989 // Either the prefix sequence was bad, or there are prefixes that
990 // we don't currently support (groups 3 and 4)
991 MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
992 return;
993 }
994
995 origBytes += numPrefixBytes;
996 if (*origBytes >= 0x88 && *origBytes <= 0x8B) {
997 // various MOVs
998 ++origBytes;
999 int len = CountModRmSib(origBytes);
1000 if (len < 0) {
1001 MOZ_ASSERT_UNREACHABLE("Unrecognized MOV opcode sequence");
1002 return;
1003 }
1004 origBytes += len;
1005 } else if (*origBytes == 0x0f &&
1006 (origBytes[1] == 0x10 || origBytes[1] == 0x11)) {
1007 // SSE: movups xmm, xmm/m128
1008 // movups xmm/m128, xmm
1009 origBytes += 2;
1010 int len = CountModRmSib(origBytes);
1011 if (len < 0) {
1012 MOZ_ASSERT_UNREACHABLE("Unrecognized MOV opcode sequence");
1013 return;
1014 }
1015 origBytes += len;
1016 } else if (*origBytes == 0xA1) {
1017 // MOV eax, [seg:offset]
1018 origBytes += 5;
1019 } else if (*origBytes == 0xB8) {
1020 // MOV 0xB8: http://ref.x86asm.net/coder32.html#xB8
1021 origBytes += 5;
1022 } else if (*origBytes == 0x33 && (origBytes[1] & kMaskMod) == kModReg) {
1023 // XOR r32, r32
1024 origBytes += 2;
1025 } else if ((*origBytes & 0xf8) == 0x40) {
1026 // INC r32
1027 origBytes += 1;
1028 } else if (*origBytes == 0x83) {
1029 uint8_t mod = static_cast<uint8_t>(origBytes[1]) & kMaskMod;
1030 uint8_t rm = static_cast<uint8_t>(origBytes[1]) & kMaskRm;
1031 if (mod == kModReg) {
1032 // ADD|OR|ADC|SBB|AND|SUB|XOR|CMP r, imm8
1033 origBytes += 3;
1034 } else if (mod == kModDisp8 && rm != kRmNeedSib) {
1035 // ADD|OR|ADC|SBB|AND|SUB|XOR|CMP [r+disp8], imm8
1036 origBytes += 4;
1037 } else {
1038 // bail
1039 MOZ_ASSERT_UNREACHABLE("Unrecognized bit opcode sequence");
1040 return;
1041 }
1042 } else if (*origBytes == 0x68) {
1043 // PUSH with 4-byte operand
1044 origBytes += 5;
1045 } else if ((*origBytes & 0xf0) == 0x50) {
1046 // 1-byte PUSH/POP
1047 ++origBytes;
1048 } else if (*origBytes == 0x6A) {
1049 // PUSH imm8
1050 origBytes += 2;
1051 } else if (*origBytes == 0xe9) {
1052 pJmp32 = origBytes.GetOffset();
1053 // jmp 32bit offset
1054 origBytes += 5;
1055 } else if (*origBytes == 0xff && origBytes[1] == 0x25) {
1056 // jmp [disp32]
1057 origBytes += 6;
1058 } else if (*origBytes == 0xc2) {
1059 // ret imm16. We can't handle this but it happens. We don't ASSERT but
1060 // we do fail to hook.
1061 # if defined(MOZILLA_INTERNAL_API)
1062 NS_WARNING("Cannot hook method -- RET opcode found");
1063 # endif
1064 return;
1065 } else {
1066 // printf ("Unknown x86 instruction byte 0x%02x, aborting trampoline\n",
1067 // *origBytes);
1068 MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1069 return;
1070 }
1071 }
1072
1073 // The trampoline is a copy of the instructions that we just traced,
1074 // followed by a jump that we add below.
1075 tramp.CopyFrom(origBytes.GetBaseAddress(), origBytes.GetOffset());
1076 if (!tramp) {
1077 return;
1078 }
1079 #elif defined(_M_X64)
1080 bool foundJmp = false;
1081 // |use10BytePatch| should always default to |false| in production. It is
1082 // not set to true unless we detect that a 10-byte patch is necessary.
1083 // OTOH, for testing purposes, if we want to force a 10-byte patch, we
1084 // always initialize |use10BytePatch| to |true|.
1085 bool use10BytePatch =
1086 (mFlags.value() & DetourFlags::eTestOnlyForceShortPatch) ==
1087 DetourFlags::eTestOnlyForceShortPatch;
1088 const uint32_t bytesRequired =
1089 use10BytePatch ? 10 : kWorstCaseBytesRequired;
1090
1091 while (origBytes.GetOffset() < bytesRequired) {
1092 // If we found JMP 32bit offset, we require that the next bytes must
1093 // be NOP or INT3. There is no reason to copy them.
1094 // TODO: This used to trigger for Je as well. Now that I allow
1095 // instructions after CALL and JE, I don't think I need that.
1096 // The only real value of this condition is that if code follows a JMP
1097 // then its _probably_ the target of a JMP somewhere else and we
1098 // will be overwriting it, which would be tragic. This seems
1099 // highly unlikely.
1100 if (foundJmp) {
1101 if (*origBytes == 0x90 || *origBytes == 0xcc) {
1102 ++origBytes;
1103 continue;
1104 }
1105
1106 // If our trampoline space is located in the lowest 2GB, we can do a ten
1107 // byte patch instead of a thirteen byte patch.
1108 if (aTrampPool && aTrampPool->IsInLowest2GB() &&
1109 origBytes.GetOffset() >= 10) {
1110 use10BytePatch = true;
1111 break;
1112 }
1113
1114 MOZ_ASSERT_UNREACHABLE("Opcode sequence includes commands after JMP");
1115 return;
1116 }
1117 if (*origBytes == 0x0f) {
1118 COPY_CODES(1);
1119 if (*origBytes == 0x1f) {
1120 // nop (multibyte)
1121 COPY_CODES(1);
1122 if ((*origBytes & 0xc0) == 0x40 && (*origBytes & 0x7) == 0x04) {
1123 COPY_CODES(3);
1124 } else {
1125 MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1126 return;
1127 }
1128 } else if (*origBytes == 0x05) {
1129 // syscall
1130 COPY_CODES(1);
1131 } else if (*origBytes == 0x10 || *origBytes == 0x11) {
1132 // SSE: movups xmm, xmm/m128
1133 // movups xmm/m128, xmm
1134 COPY_CODES(1);
1135 int nModRmSibBytes = CountModRmSib(origBytes);
1136 if (nModRmSibBytes < 0) {
1137 MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1138 return;
1139 } else {
1140 COPY_CODES(nModRmSibBytes);
1141 }
1142 } else if (*origBytes >= 0x83 && *origBytes <= 0x85) {
1143 // 0f 83 cd JAE rel32
1144 // 0f 84 cd JE rel32
1145 // 0f 85 cd JNE rel32
1146 const JumpType kJumpTypes[] = {JumpType::Jae, JumpType::Je,
1147 JumpType::Jne};
1148 auto jumpType = kJumpTypes[*origBytes - 0x83];
1149 ++origBytes;
1150 --tramp; // overwrite the 0x0f we copied above
1151
1152 if (!GenerateJump(tramp, origBytes.ReadDisp32AsAbsolute(),
1153 jumpType)) {
1154 return;
1155 }
1156 } else {
1157 MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1158 return;
1159 }
1160 } else if (*origBytes >= 0x88 && *origBytes <= 0x8B) {
1161 // various 32-bit MOVs
1162 COPY_CODES(1);
1163 int len = CountModRmSib(origBytes);
1164 if (len < 0) {
1165 MOZ_ASSERT_UNREACHABLE("Unrecognized MOV opcode sequence");
1166 return;
1167 }
1168 COPY_CODES(len);
1169 } else if (*origBytes == 0x40 || *origBytes == 0x41) {
1170 // Plain REX or REX.B
1171 COPY_CODES(1);
1172 if ((*origBytes & 0xf0) == 0x50) {
1173 // push/pop with Rx register
1174 COPY_CODES(1);
1175 } else if (*origBytes >= 0xb8 && *origBytes <= 0xbf) {
1176 // mov r32, imm32
1177 COPY_CODES(5);
1178 } else {
1179 MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1180 return;
1181 }
1182 } else if (*origBytes == 0x44) {
1183 // REX.R
1184 COPY_CODES(1);
1185
1186 // TODO: Combine with the "0x89" case below in the REX.W section
1187 if (*origBytes == 0x89) {
1188 // mov r/m32, r32
1189 COPY_CODES(1);
1190 int len = CountModRmSib(origBytes);
1191 if (len < 0) {
1192 MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1193 return;
1194 }
1195 COPY_CODES(len);
1196 } else {
1197 MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1198 return;
1199 }
1200 } else if (*origBytes == 0x45) {
1201 // REX.R & REX.B
1202 COPY_CODES(1);
1203
1204 if (*origBytes == 0x33) {
1205 // xor r32, r32
1206 COPY_CODES(2);
1207 } else {
1208 MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1209 return;
1210 }
1211 } else if ((*origBytes & 0xfa) == 0x48) {
1212 // REX.W | REX.WR | REX.WRB | REX.WB
1213 COPY_CODES(1);
1214
1215 if (*origBytes == 0x81 && (origBytes[1] & 0xf8) == 0xe8) {
1216 // sub r, dword
1217 COPY_CODES(6);
1218 } else if (*origBytes == 0x83 && (origBytes[1] & 0xf8) == 0xe8) {
1219 // sub r, byte
1220 COPY_CODES(3);
1221 } else if (*origBytes == 0x83 &&
1222 (origBytes[1] & (kMaskMod | kMaskReg)) == kModReg) {
1223 // add r, byte
1224 COPY_CODES(3);
1225 } else if (*origBytes == 0x83 && (origBytes[1] & 0xf8) == 0x60) {
1226 // and [r+d], imm8
1227 COPY_CODES(5);
1228 } else if (*origBytes == 0x2b && (origBytes[1] & kMaskMod) == kModReg) {
1229 // sub r64, r64
1230 COPY_CODES(2);
1231 } else if (*origBytes == 0x85) {
1232 // 85 /r => TEST r/m32, r32
1233 if ((origBytes[1] & 0xc0) == 0xc0) {
1234 COPY_CODES(2);
1235 } else {
1236 MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1237 return;
1238 }
1239 } else if ((*origBytes & 0xfd) == 0x89) {
1240 // MOV r/m64, r64 | MOV r64, r/m64
1241 BYTE reg;
1242 int len = CountModRmSib(origBytes + 1, ®);
1243 if (len < 0) {
1244 MOZ_ASSERT(len == kModOperand64);
1245 if (len != kModOperand64) {
1246 return;
1247 }
1248 origBytes += 2; // skip the MOV and MOD R/M bytes
1249
1250 // The instruction MOVs 64-bit data from a RIP-relative memory
1251 // address (determined with a 32-bit offset from RIP) into a
1252 // 64-bit register.
1253 uintptr_t absAddr = origBytes.ReadDisp32AsAbsolute();
1254
1255 if (reg == kRegAx) {
1256 // Destination is RAX. Encode instruction as MOVABS with a
1257 // 64-bit absolute address as its immediate operand.
1258 tramp.WriteByte(0xa1);
1259 tramp.WritePointer(absAddr);
1260 } else {
1261 // The MOV must be done in two steps. First, we MOVABS the
1262 // absolute 64-bit address into our target register.
1263 // Then, we MOV from that address into the register
1264 // using register-indirect addressing.
1265 tramp.WriteByte(0xb8 + reg);
1266 tramp.WritePointer(absAddr);
1267 tramp.WriteByte(0x48);
1268 tramp.WriteByte(0x8b);
1269 tramp.WriteByte(BuildModRmByte(kModNoRegDisp, reg, reg));
1270 }
1271 } else {
1272 COPY_CODES(len + 1);
1273 }
1274 } else if ((*origBytes & 0xf8) == 0xb8) {
1275 // MOV r64, imm64
1276 COPY_CODES(9);
1277 } else if (*origBytes == 0xc7) {
1278 // MOV r/m64, imm32
1279 if (origBytes[1] == 0x44) {
1280 // MOV [r64+disp8], imm32
1281 // ModR/W + SIB + disp8 + imm32
1282 COPY_CODES(8);
1283 } else {
1284 MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1285 return;
1286 }
1287 } else if (*origBytes == 0xff) {
1288 // JMP /4
1289 if ((origBytes[1] & 0xc0) == 0x0 && (origBytes[1] & 0x07) == 0x5) {
1290 origBytes += 2;
1291 --tramp; // overwrite the REX.W/REX.RW we copied above
1292
1293 if (!GenerateJump(tramp, origBytes.ChasePointerFromDisp(),
1294 JumpType::Jmp)) {
1295 return;
1296 }
1297
1298 foundJmp = true;
1299 } else {
1300 // not support yet!
1301 MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1302 return;
1303 }
1304 } else if (*origBytes == 0x8d) {
1305 // LEA reg, addr
1306 if ((origBytes[1] & kMaskMod) == 0x0 &&
1307 (origBytes[1] & kMaskRm) == 0x5) {
1308 // [rip+disp32]
1309 // convert 32bit offset to 64bit direct and convert instruction
1310 // to a simple 64-bit mov
1311 BYTE reg = (origBytes[1] & kMaskReg) >> kRegFieldShift;
1312 origBytes += 2;
1313 uintptr_t absAddr = origBytes.ReadDisp32AsAbsolute();
1314 tramp.WriteByte(0xb8 + reg); // move
1315 tramp.WritePointer(absAddr);
1316 } else {
1317 // Above we dealt with RIP-relative instructions. Any other
1318 // operand form can simply be copied.
1319 int len = CountModRmSib(origBytes + 1);
1320 // We handled the kModOperand64 -- ie RIP-relative -- case above
1321 MOZ_ASSERT(len > 0);
1322 COPY_CODES(len + 1);
1323 }
1324 } else if (*origBytes == 0x63 && (origBytes[1] & kMaskMod) == kModReg) {
1325 // movsxd r64, r32 (move + sign extend)
1326 COPY_CODES(2);
1327 } else {
1328 // not support yet!
1329 MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1330 return;
1331 }
1332 } else if (*origBytes == 0x66) {
1333 // operand override prefix
1334 COPY_CODES(1);
1335 // This is the same as the x86 version
1336 if (*origBytes >= 0x88 && *origBytes <= 0x8B) {
1337 // various MOVs
1338 unsigned char b = origBytes[1];
1339 if (((b & 0xc0) == 0xc0) ||
1340 (((b & 0xc0) == 0x00) && ((b & 0x07) != 0x04) &&
1341 ((b & 0x07) != 0x05))) {
1342 // REG=r, R/M=r or REG=r, R/M=[r]
1343 COPY_CODES(2);
1344 } else if ((b & 0xc0) == 0x40) {
1345 if ((b & 0x07) == 0x04) {
1346 // REG=r, R/M=[SIB + disp8]
1347 COPY_CODES(4);
1348 } else {
1349 // REG=r, R/M=[r + disp8]
1350 COPY_CODES(3);
1351 }
1352 } else {
1353 // complex MOV, bail
1354 MOZ_ASSERT_UNREACHABLE("Unrecognized MOV opcode sequence");
1355 return;
1356 }
1357 } else if (*origBytes == 0x44 && origBytes[1] == 0x89) {
1358 // mov word ptr [reg+disp8], reg
1359 COPY_CODES(2);
1360 int len = CountModRmSib(origBytes);
1361 if (len < 0) {
1362 // no way to support this yet.
1363 MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1364 return;
1365 }
1366 COPY_CODES(len);
1367 }
1368 } else if ((*origBytes & 0xf0) == 0x50) {
1369 // 1-byte push/pop
1370 COPY_CODES(1);
1371 } else if (*origBytes == 0x65) {
1372 // GS prefix
1373 //
1374 // The entry of GetKeyState on Windows 10 has the following code.
1375 // 65 48 8b 04 25 30 00 00 00 mov rax,qword ptr gs:[30h]
1376 // (GS prefix + REX + MOV (0x8b) ...)
1377 if (origBytes[1] == 0x48 &&
1378 (origBytes[2] >= 0x88 && origBytes[2] <= 0x8b)) {
1379 COPY_CODES(3);
1380 int len = CountModRmSib(origBytes);
1381 if (len < 0) {
1382 // no way to support this yet.
1383 MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1384 return;
1385 }
1386 COPY_CODES(len);
1387 } else {
1388 MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1389 return;
1390 }
1391 } else if (*origBytes == 0x80 && origBytes[1] == 0x3d) {
1392 origBytes += 2;
1393
1394 // cmp byte ptr [rip-relative address], imm8
1395 // We'll compute the absolute address and do the cmp in r11
1396
1397 // push r11 (to save the old value)
1398 tramp.WriteByte(0x49);
1399 tramp.WriteByte(0x53);
1400
1401 uintptr_t absAddr = origBytes.ReadDisp32AsAbsolute();
1402
1403 // mov r11, absolute address
1404 tramp.WriteByte(0x49);
1405 tramp.WriteByte(0xbb);
1406 tramp.WritePointer(absAddr);
1407
1408 // cmp byte ptr [r11],...
1409 tramp.WriteByte(0x41);
1410 tramp.WriteByte(0x80);
1411 tramp.WriteByte(0x3b);
1412
1413 // ...imm8
1414 COPY_CODES(1);
1415
1416 // pop r11 (doesn't affect the flags from the cmp)
1417 tramp.WriteByte(0x49);
1418 tramp.WriteByte(0x5b);
1419 } else if (*origBytes == 0x90) {
1420 // nop
1421 COPY_CODES(1);
1422 } else if ((*origBytes & 0xf8) == 0xb8) {
1423 // MOV r32, imm32
1424 COPY_CODES(5);
1425 } else if (*origBytes == 0x33) {
1426 // xor r32, r/m32
1427 COPY_CODES(2);
1428 } else if (*origBytes == 0xf6) {
1429 // test r/m8, imm8 (used by ntdll on Windows 10 x64)
1430 // (no flags are affected by near jmp since there is no task switch,
1431 // so it is ok for a jmp to be written immediately after a test)
1432 BYTE subOpcode = 0;
1433 int nModRmSibBytes = CountModRmSib(origBytes + 1, &subOpcode);
1434 if (nModRmSibBytes < 0 || subOpcode != 0) {
1435 // Unsupported
1436 MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1437 return;
1438 }
1439 COPY_CODES(2 + nModRmSibBytes);
1440 } else if (*origBytes == 0x85) {
1441 // test r/m32, r32
1442 int nModRmSibBytes = CountModRmSib(origBytes + 1);
1443 if (nModRmSibBytes < 0) {
1444 MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1445 return;
1446 }
1447 COPY_CODES(1 + nModRmSibBytes);
1448 } else if (*origBytes == 0xd1 && (origBytes[1] & kMaskMod) == kModReg) {
1449 // bit shifts/rotates : (SA|SH|RO|RC)(R|L) r32
1450 // (e.g. 0xd1 0xe0 is SAL, 0xd1 0xc8 is ROR)
1451 COPY_CODES(2);
1452 } else if (*origBytes == 0x83 && (origBytes[1] & kMaskMod) == kModReg) {
1453 // ADD|OR|ADC|SBB|AND|SUB|XOR|CMP r, imm8
1454 COPY_CODES(3);
1455 } else if (*origBytes == 0xc3) {
1456 // ret
1457 COPY_CODES(1);
1458 } else if (*origBytes == 0xcc) {
1459 // int 3
1460 COPY_CODES(1);
1461 } else if (*origBytes == 0xe8 || *origBytes == 0xe9) {
1462 // CALL (0xe8) or JMP (0xe9) 32bit offset
1463 foundJmp = *origBytes == 0xe9;
1464 ++origBytes;
1465
1466 if (!GenerateJump(tramp, origBytes.ReadDisp32AsAbsolute(),
1467 foundJmp ? JumpType::Jmp : JumpType::Call)) {
1468 return;
1469 }
1470 } else if (*origBytes >= 0x73 && *origBytes <= 0x75) {
1471 // 73 cb JAE rel8
1472 // 74 cb JE rel8
1473 // 75 cb JNE rel8
1474 const JumpType kJumpTypes[] = {JumpType::Jae, JumpType::Je,
1475 JumpType::Jne};
1476 auto jumpType = kJumpTypes[*origBytes - 0x73];
1477 uint8_t offset = origBytes[1];
1478
1479 origBytes += 2;
1480
1481 if (!GenerateJump(tramp, origBytes.OffsetToAbsolute(offset),
1482 jumpType)) {
1483 return;
1484 }
1485 } else if (*origBytes == 0xff) {
1486 uint8_t mod = origBytes[1] & kMaskMod;
1487 uint8_t reg = (origBytes[1] & kMaskReg) >> kRegFieldShift;
1488 uint8_t rm = origBytes[1] & kMaskRm;
1489 if (mod == kModReg && (reg == 0 || reg == 1 || reg == 2 || reg == 6)) {
1490 // INC|DEC|CALL|PUSH r64
1491 COPY_CODES(2);
1492 } else if (mod == kModNoRegDisp && reg == 2 &&
1493 rm == kRmNoRegDispDisp32) {
1494 // FF 15 CALL [disp32]
1495 origBytes += 2;
1496 if (!GenerateJump(tramp, origBytes.ChasePointerFromDisp(),
1497 JumpType::Call)) {
1498 return;
1499 }
1500 } else if (reg == 4) {
1501 // FF /4 (Opcode=ff, REG=4): JMP r/m
1502 if (mod == kModNoRegDisp && rm == kRmNoRegDispDisp32) {
1503 // FF 25 JMP [disp32]
1504 foundJmp = true;
1505
1506 origBytes += 2;
1507
1508 uintptr_t jmpDest = origBytes.ChasePointerFromDisp();
1509
1510 if (!GenerateJump(tramp, jmpDest, JumpType::Jmp)) {
1511 return;
1512 }
1513 } else {
1514 // JMP r/m except JMP [disp32]
1515 int len = CountModRmSib(origBytes + 1);
1516 if (len < 0) {
1517 // RIP-relative not yet supported
1518 MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1519 return;
1520 }
1521
1522 COPY_CODES(len + 1);
1523
1524 foundJmp = true;
1525 }
1526 } else {
1527 MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1528 return;
1529 }
1530 } else if (*origBytes == 0x83 && (origBytes[1] & 0xf8) == 0x60) {
1531 // and [r+d], imm8
1532 COPY_CODES(5);
1533 } else if (*origBytes == 0xc6) {
1534 // mov [r+d], imm8
1535 int len = CountModRmSib(origBytes + 1);
1536 if (len < 0) {
1537 // RIP-relative not yet supported
1538 MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1539 return;
1540 }
1541 COPY_CODES(len + 2);
1542 } else {
1543 MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1544 return;
1545 }
1546 }
1547 #elif defined(_M_ARM64)
1548
1549 // The number of bytes required to facilitate a detour depends on the
1550 // proximity of the hook function to the target function. In the best case,
1551 // we can branch within +/- 128MB of the current location, requiring only
1552 // 4 bytes. In the worst case, we need 16 bytes to load an absolute address
1553 // into a register and then branch to it.
1554 const uint32_t bytesRequiredFromDecode =
1555 (mFlags.value() & DetourFlags::eTestOnlyForceShortPatch)
1556 ? 4
1557 : kWorstCaseBytesRequired;
1558
1559 while (origBytes.GetOffset() < bytesRequiredFromDecode) {
1560 uintptr_t curPC = origBytes.GetCurrentAbsolute();
1561 uint32_t curInst = origBytes.ReadNextInstruction();
1562
1563 Result<arm64::LoadOrBranch, arm64::PCRelCheckError> pcRelInfo =
1564 arm64::CheckForPCRel(curPC, curInst);
1565 if (pcRelInfo.isErr()) {
1566 if (pcRelInfo.unwrapErr() ==
1567 arm64::PCRelCheckError::InstructionNotPCRel) {
1568 // Instruction is not PC-relative, we can just copy it verbatim
1569 tramp.WriteInstruction(curInst);
1570 continue;
1571 }
1572
1573 // At this point we have determined that there is no decoder available
1574 // for the current, PC-relative, instruction.
1575
1576 // origBytes is now pointing one instruction past the one that we
1577 // need the trampoline to jump back to.
1578 if (!origBytes.BackUpOneInstruction()) {
1579 return;
1580 }
1581
1582 break;
1583 }
1584
1585 // We need to load an absolute address into a particular register
1586 tramp.WriteLoadLiteral(pcRelInfo.inspect().mAbsAddress,
1587 pcRelInfo.inspect().mDestReg);
1588 }
1589
1590 #else
1591 # error "Unknown processor type"
1592 #endif
1593
1594 if (origBytes.GetOffset() > 100) {
1595 // printf ("Too big!");
1596 return;
1597 }
1598
1599 #if defined(_M_IX86)
1600 if (pJmp32 >= 0) {
1601 // Jump directly to the original target of the jump instead of jumping to
1602 // the original function. Adjust jump target displacement to jump location
1603 // in the trampoline.
1604 tramp.AdjustDisp32AtOffset(pJmp32 + 1, origBytes.GetBaseAddress());
1605 } else {
1606 tramp.WriteByte(0xe9); // jmp
1607 tramp.WriteDisp32(origBytes.GetAddress());
1608 }
1609 #elif defined(_M_X64)
1610 // If we found a Jmp, we don't need to add another instruction. However,
1611 // if we found a _conditional_ jump or a CALL (or no control operations
1612 // at all) then we still need to run the rest of aOriginalFunction.
1613 if (!foundJmp) {
1614 if (!GenerateJump(tramp, origBytes.GetAddress(), JumpType::Jmp)) {
1615 return;
1616 }
1617 }
1618 #elif defined(_M_ARM64)
1619 // Let's find out how many bytes we have available to us for patching
1620 uint32_t numBytesForPatching = tramp.GetCurrentExecutableCodeLen();
1621
1622 if (!numBytesForPatching) {
1623 // There's nothing we can do
1624 return;
1625 }
1626
1627 if (tramp.IsNull()) {
1628 // Recursive case
1629 HMODULE targetModule = nullptr;
1630
1631 if (numBytesForPatching < kWorstCaseBytesRequired) {
1632 if (!::GetModuleHandleExW(
1633 GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS |
1634 GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
1635 reinterpret_cast<LPCWSTR>(origBytes.GetBaseAddress()),
1636 &targetModule)) {
1637 return;
1638 }
1639 }
1640
1641 Maybe<TrampPoolT> maybeTrampPool = DoReserve(targetModule);
1642 MOZ_ASSERT(maybeTrampPool);
1643 if (!maybeTrampPool) {
1644 return;
1645 }
1646
1647 Maybe<Trampoline<MMPolicyT>> maybeRealTramp(
1648 maybeTrampPool.ref().GetNextTrampoline());
1649 if (!maybeRealTramp) {
1650 return;
1651 }
1652
1653 origBytes.Rewind();
1654 CreateTrampoline(origBytes, maybeTrampPool.ptr(), maybeRealTramp.ref(),
1655 aDest, aOutTramp);
1656 return;
1657 }
1658
1659 // Write the branch from the trampoline back to the original code
1660
1661 tramp.WriteLoadLiteral(origBytes.GetAddress(), 16);
1662 tramp.WriteInstruction(arm64::BuildUnconditionalBranchToRegister(16));
1663 #else
1664 # error "Unsupported processor architecture"
1665 #endif
1666
1667 // The trampoline is now complete.
1668 void* trampPtr = tramp.EndExecutableCode();
1669 if (!trampPtr) {
1670 return;
1671 }
1672
1673 WritableTargetFunction<MMPolicyT> target(origBytes.Promote());
1674 if (!target) {
1675 return;
1676 }
1677
1678 do {
1679 // Now patch the original function.
1680 // When we're instructed to apply a non-default patch, apply it and exit.
1681 // If non-default patching fails, bail out, no fallback.
1682 // Otherwise, we go straight to the default patch.
1683
1684 #if defined(_M_X64)
1685 if (use10BytePatch) {
1686 if (!Apply10BytePatch(aTrampPool, trampPtr, target, aDest)) {
1687 return;
1688 }
1689 break;
1690 }
1691 #elif defined(_M_ARM64)
1692 if (numBytesForPatching < kWorstCaseBytesRequired) {
1693 if (!Apply4BytePatch(aTrampPool, trampPtr, target, aDest)) {
1694 return;
1695 }
1696 break;
1697 }
1698 #endif
1699
1700 PrimitiveT::ApplyDefaultPatch(target, aDest);
1701 } while (false);
1702
1703 if (!target.Commit()) {
1704 return;
1705 }
1706
1707 // Output the trampoline, thus signalling that this call was a success
1708 *aOutTramp = trampPtr;
1709 }
1710 };
1711
1712 } // namespace interceptor
1713 } // namespace mozilla
1714
1715 #endif // mozilla_interceptor_PatcherDetour_h
1716