1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
6 
7 #ifndef mozilla_interceptor_PatcherDetour_h
8 #define mozilla_interceptor_PatcherDetour_h
9 
10 #if defined(_M_ARM64)
11 #  include "mozilla/interceptor/Arm64.h"
12 #endif  // defined(_M_ARM64)
13 #include <utility>
14 
15 #include "mozilla/Maybe.h"
16 #include "mozilla/NativeNt.h"
17 #include "mozilla/ScopeExit.h"
18 #include "mozilla/TypedEnumBits.h"
19 #include "mozilla/Types.h"
20 #include "mozilla/Unused.h"
21 #include "mozilla/interceptor/PatcherBase.h"
22 #include "mozilla/interceptor/Trampoline.h"
23 #include "mozilla/interceptor/VMSharingPolicies.h"
24 
25 #define COPY_CODES(NBYTES)                          \
26   do {                                              \
27     tramp.CopyFrom(origBytes.GetAddress(), NBYTES); \
28     origBytes += NBYTES;                            \
29   } while (0)
30 
31 namespace mozilla {
32 namespace interceptor {
33 
34 enum class DetourFlags : uint32_t {
35   eDefault = 0,
36   eEnable10BytePatch = 1,  // Allow 10-byte patches when conditions allow
37   eTestOnlyForceShortPatch =
38       2,  // Force short patches at all times (x86-64 and arm64 testing only)
39   eDontResolveRedirection =
40       4,  // Don't resolve the redirection of JMP (e.g. kernel32 -> kernelbase)
41 };
42 
MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(DetourFlags)43 MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(DetourFlags)
44 
45 // This class is responsible to do tasks which depend on MMPolicy, decoupled
46 // from VMPolicy.  We already have WindowsDllPatcherBase, but it needs to
47 // depend on VMPolicy to hold an instance of VMPolicy as a member.
48 template <typename MMPolicyT>
49 class WindowsDllDetourPatcherPrimitive {
50  protected:
51 #if defined(_M_ARM64)
52   // LDR x16, .+8
53   static const uint32_t kLdrX16Plus8 = 0x58000050U;
54 #endif  // defined(_M_ARM64)
55 
56   static void ApplyDefaultPatch(WritableTargetFunction<MMPolicyT>& target,
57                                 intptr_t aDest) {
58 #if defined(_M_IX86)
59     target.WriteByte(0xe9);     // jmp
60     target.WriteDisp32(aDest);  // hook displacement
61 #elif defined(_M_X64)
62     // mov r11, address
63     target.WriteByte(0x49);
64     target.WriteByte(0xbb);
65     target.WritePointer(aDest);
66 
67     // jmp r11
68     target.WriteByte(0x41);
69     target.WriteByte(0xff);
70     target.WriteByte(0xe3);
71 #elif defined(_M_ARM64)
72     // The default patch requires 16 bytes
73     // LDR x16, .+8
74     target.WriteLong(kLdrX16Plus8);
75     // BR x16
76     target.WriteLong(arm64::BuildUnconditionalBranchToRegister(16));
77     target.WritePointer(aDest);
78 #else
79 #  error "Unsupported processor architecture"
80 #endif
81   }
82 
83  public:
84   constexpr static uint32_t GetWorstCaseRequiredBytesToPatch() {
85 #if defined(_M_IX86)
86     return 5;
87 #elif defined(_M_X64)
88     return 13;
89 #elif defined(_M_ARM64)
90     return 16;
91 #else
92 #  error "Unsupported processor architecture"
93 #endif
94   }
95 
96   WindowsDllDetourPatcherPrimitive() = default;
97 
98   WindowsDllDetourPatcherPrimitive(const WindowsDllDetourPatcherPrimitive&) =
99       delete;
100   WindowsDllDetourPatcherPrimitive(WindowsDllDetourPatcherPrimitive&&) = delete;
101   WindowsDllDetourPatcherPrimitive& operator=(
102       const WindowsDllDetourPatcherPrimitive&) = delete;
103   WindowsDllDetourPatcherPrimitive& operator=(
104       WindowsDllDetourPatcherPrimitive&&) = delete;
105 
106   bool AddIrreversibleHook(const MMPolicyT& aMMPolicy, FARPROC aTargetFn,
107                            intptr_t aHookDest) {
108     ReadOnlyTargetFunction<MMPolicyT> targetReadOnly(aMMPolicy, aTargetFn);
109 
110     WritableTargetFunction<MMPolicyT> targetWritable(
111         targetReadOnly.Promote(GetWorstCaseRequiredBytesToPatch()));
112     if (!targetWritable) {
113       return false;
114     }
115 
116     ApplyDefaultPatch(targetWritable, aHookDest);
117 
118     return targetWritable.Commit();
119   }
120 };
121 
122 template <typename VMPolicy>
123 class WindowsDllDetourPatcher final
124     : public WindowsDllDetourPatcherPrimitive<typename VMPolicy::MMPolicyT>,
125       public WindowsDllPatcherBase<VMPolicy> {
126   using MMPolicyT = typename VMPolicy::MMPolicyT;
127   using TrampPoolT = typename VMPolicy::PoolType;
128   using PrimitiveT = WindowsDllDetourPatcherPrimitive<MMPolicyT>;
129   Maybe<DetourFlags> mFlags;
130 
131  public:
132   template <typename... Args>
WindowsDllDetourPatcher(Args &&...aArgs)133   explicit WindowsDllDetourPatcher(Args&&... aArgs)
134       : WindowsDllPatcherBase<VMPolicy>(std::forward<Args>(aArgs)...) {}
135 
~WindowsDllDetourPatcher()136   ~WindowsDllDetourPatcher() { Clear(); }
137 
138   WindowsDllDetourPatcher(const WindowsDllDetourPatcher&) = delete;
139   WindowsDllDetourPatcher(WindowsDllDetourPatcher&&) = delete;
140   WindowsDllDetourPatcher& operator=(const WindowsDllDetourPatcher&) = delete;
141   WindowsDllDetourPatcher& operator=(WindowsDllDetourPatcher&&) = delete;
142 
Clear()143   void Clear() {
144     if (!this->mVMPolicy.ShouldUnhookUponDestruction()) {
145       return;
146     }
147 
148 #if defined(_M_IX86)
149     size_t nBytes = 1 + sizeof(intptr_t);
150 #elif defined(_M_X64)
151     size_t nBytes = 2 + sizeof(intptr_t);
152 #elif defined(_M_ARM64)
153     size_t nBytes = 2 * sizeof(uint32_t) + sizeof(uintptr_t);
154 #else
155 #  error "Unknown processor type"
156 #endif
157 
158     const auto& tramps = this->mVMPolicy.Items();
159     for (auto&& tramp : tramps) {
160       // First we read the pointer to the interceptor instance.
161       Maybe<uintptr_t> instance = tramp.ReadEncodedPointer();
162       if (!instance) {
163         continue;
164       }
165 
166       if (instance.value() != reinterpret_cast<uintptr_t>(this)) {
167         // tramp does not belong to this interceptor instance.
168         continue;
169       }
170 
171       auto clearInstance = MakeScopeExit([&tramp]() -> void {
172         // Clear the instance pointer so that no future instances with the same
173         // |this| pointer will attempt to reset its hook.
174         tramp.Rewind();
175         tramp.WriteEncodedPointer(nullptr);
176       });
177 
178       // Now we read the pointer to the intercepted function.
179       Maybe<uintptr_t> interceptedFn = tramp.ReadEncodedPointer();
180       if (!interceptedFn) {
181         continue;
182       }
183 
184       WritableTargetFunction<MMPolicyT> origBytes(
185           this->mVMPolicy, interceptedFn.value(), nBytes);
186       if (!origBytes) {
187         continue;
188       }
189 
190 #if defined(_M_IX86) || defined(_M_X64)
191 
192       Maybe<uint8_t> maybeOpcode1 = origBytes.ReadByte();
193       if (!maybeOpcode1) {
194         continue;
195       }
196 
197       uint8_t opcode1 = maybeOpcode1.value();
198 
199 #  if defined(_M_IX86)
200       // Ensure the JMP from CreateTrampoline is where we expect it to be.
201       MOZ_ASSERT(opcode1 == 0xE9);
202       if (opcode1 != 0xE9) {
203         continue;
204       }
205 
206       intptr_t startOfTrampInstructions =
207           static_cast<intptr_t>(tramp.GetCurrentRemoteAddress());
208 
209       origBytes.WriteDisp32(startOfTrampInstructions);
210       if (!origBytes) {
211         continue;
212       }
213 
214       origBytes.Commit();
215 #  elif defined(_M_X64)
216       if (opcode1 == 0x49) {
217         if (!Clear13BytePatch(origBytes, tramp.GetCurrentRemoteAddress())) {
218           continue;
219         }
220       } else if (opcode1 == 0xB8) {
221         if (!Clear10BytePatch(origBytes)) {
222           continue;
223         }
224       } else if (opcode1 == 0x48) {
225         // The original function was just a different trampoline
226         if (!ClearTrampolinePatch(origBytes, tramp.GetCurrentRemoteAddress())) {
227           continue;
228         }
229       } else {
230         MOZ_ASSERT_UNREACHABLE("Unrecognized patch!");
231         continue;
232       }
233 #  endif
234 
235 #elif defined(_M_ARM64)
236 
237       // Ensure that we see the instruction that we expect
238       Maybe<uint32_t> inst1 = origBytes.ReadLong();
239       if (!inst1) {
240         continue;
241       }
242 
243       if (inst1.value() == this->kLdrX16Plus8) {
244         if (!Clear16BytePatch(origBytes, tramp.GetCurrentRemoteAddress())) {
245           continue;
246         }
247       } else if (arm64::IsUnconditionalBranchImm(inst1.value())) {
248         if (!Clear4BytePatch(inst1.value(), origBytes)) {
249           continue;
250         }
251       } else {
252         MOZ_ASSERT_UNREACHABLE("Unrecognized patch!");
253         continue;
254       }
255 
256 #else
257 #  error "Unknown processor type"
258 #endif
259     }
260 
261     this->mVMPolicy.Clear();
262   }
263 
264 #if defined(_M_X64)
Clear13BytePatch(WritableTargetFunction<MMPolicyT> & aOrigBytes,const uintptr_t aResetToAddress)265   bool Clear13BytePatch(WritableTargetFunction<MMPolicyT>& aOrigBytes,
266                         const uintptr_t aResetToAddress) {
267     Maybe<uint8_t> maybeOpcode2 = aOrigBytes.ReadByte();
268     if (!maybeOpcode2) {
269       return false;
270     }
271 
272     uint8_t opcode2 = maybeOpcode2.value();
273     if (opcode2 != 0xBB) {
274       return false;
275     }
276 
277     aOrigBytes.WritePointer(aResetToAddress);
278     if (!aOrigBytes) {
279       return false;
280     }
281 
282     return aOrigBytes.Commit();
283   }
284 
ClearTrampolinePatch(WritableTargetFunction<MMPolicyT> & aOrigBytes,const uintptr_t aPtrToResetToAddress)285   bool ClearTrampolinePatch(WritableTargetFunction<MMPolicyT>& aOrigBytes,
286                             const uintptr_t aPtrToResetToAddress) {
287     // The target of the trampoline we replaced is stored at
288     // aPtrToResetToAddress. We simply put it back where we got it from.
289     Maybe<uint8_t> maybeOpcode2 = aOrigBytes.ReadByte();
290     if (!maybeOpcode2) {
291       return false;
292     }
293 
294     uint8_t opcode2 = maybeOpcode2.value();
295     if (opcode2 != 0xB8) {
296       return false;
297     }
298 
299     auto oldPtr = *(reinterpret_cast<const uintptr_t*>(aPtrToResetToAddress));
300 
301     aOrigBytes.WritePointer(oldPtr);
302     if (!aOrigBytes) {
303       return false;
304     }
305 
306     return aOrigBytes.Commit();
307   }
308 
Clear10BytePatch(WritableTargetFunction<MMPolicyT> & aOrigBytes)309   bool Clear10BytePatch(WritableTargetFunction<MMPolicyT>& aOrigBytes) {
310     Maybe<uint32_t> maybePtr32 = aOrigBytes.ReadLong();
311     if (!maybePtr32) {
312       return false;
313     }
314 
315     uint32_t ptr32 = maybePtr32.value();
316     // We expect the high bit to be clear
317     if (ptr32 & 0x80000000) {
318       return false;
319     }
320 
321     uintptr_t trampPtr = ptr32;
322 
323     // trampPtr points to an intermediate trampoline that contains a 13-byte
324     // patch. We back up by sizeof(uintptr_t) so that we can access the pointer
325     // to the stub trampoline.
326     WritableTargetFunction<MMPolicyT> writableIntermediate(
327         this->mVMPolicy, trampPtr - sizeof(uintptr_t), 13 + sizeof(uintptr_t));
328     if (!writableIntermediate) {
329       return false;
330     }
331 
332     Maybe<uintptr_t> stubTramp = writableIntermediate.ReadEncodedPtr();
333     if (!stubTramp || !stubTramp.value()) {
334       return false;
335     }
336 
337     Maybe<uint8_t> maybeOpcode1 = writableIntermediate.ReadByte();
338     if (!maybeOpcode1) {
339       return false;
340     }
341 
342     // We expect this opcode to be the beginning of our normal mov r11, ptr
343     // patch sequence.
344     uint8_t opcode1 = maybeOpcode1.value();
345     if (opcode1 != 0x49) {
346       return false;
347     }
348 
349     // Now we can just delegate the rest to our normal 13-byte patch clearing.
350     return Clear13BytePatch(writableIntermediate, stubTramp.value());
351   }
352 #endif  // defined(_M_X64)
353 
354 #if defined(_M_ARM64)
Clear4BytePatch(const uint32_t aBranchImm,WritableTargetFunction<MMPolicyT> & aOrigBytes)355   bool Clear4BytePatch(const uint32_t aBranchImm,
356                        WritableTargetFunction<MMPolicyT>& aOrigBytes) {
357     MOZ_ASSERT(arm64::IsUnconditionalBranchImm(aBranchImm));
358 
359     arm64::LoadOrBranch decoded = arm64::BUncondImmDecode(
360         aOrigBytes.GetCurrentAddress() - sizeof(uint32_t), aBranchImm);
361 
362     uintptr_t trampPtr = decoded.mAbsAddress;
363 
364     // trampPtr points to an intermediate trampoline that contains a veneer.
365     // We back up by sizeof(uintptr_t) so that we can access the pointer to the
366     // stub trampoline.
367 
368     // We want trampLen to be the size of the veneer, plus one pointer (since
369     // we are backing up trampPtr by one pointer)
370     size_t trampLen = 16 + sizeof(uintptr_t);
371 
372     WritableTargetFunction<MMPolicyT> writableIntermediate(
373         this->mVMPolicy, trampPtr - sizeof(uintptr_t), trampLen);
374     if (!writableIntermediate) {
375       return false;
376     }
377 
378     Maybe<uintptr_t> stubTramp = writableIntermediate.ReadEncodedPtr();
379     if (!stubTramp || !stubTramp.value()) {
380       return false;
381     }
382 
383     Maybe<uint32_t> inst1 = writableIntermediate.ReadLong();
384     if (!inst1 || inst1.value() != this->kLdrX16Plus8) {
385       return false;
386     }
387 
388     return Clear16BytePatch(writableIntermediate, stubTramp.value());
389   }
390 
Clear16BytePatch(WritableTargetFunction<MMPolicyT> & aOrigBytes,const uintptr_t aResetToAddress)391   bool Clear16BytePatch(WritableTargetFunction<MMPolicyT>& aOrigBytes,
392                         const uintptr_t aResetToAddress) {
393     Maybe<uint32_t> inst2 = aOrigBytes.ReadLong();
394     if (!inst2) {
395       return false;
396     }
397 
398     if (inst2.value() != arm64::BuildUnconditionalBranchToRegister(16)) {
399       MOZ_ASSERT_UNREACHABLE("Unrecognized patch!");
400       return false;
401     }
402 
403     // Clobber the pointer to our hook function with a pointer to the
404     // start of the trampoline.
405     aOrigBytes.WritePointer(aResetToAddress);
406     aOrigBytes.Commit();
407 
408     return true;
409   }
410 #endif  // defined(_M_ARM64)
411 
412   void Init(DetourFlags aFlags = DetourFlags::eDefault) {
413     if (Initialized()) {
414       return;
415     }
416 
417 #if defined(_M_X64)
418     if (aFlags & DetourFlags::eTestOnlyForceShortPatch) {
419       aFlags |= DetourFlags::eEnable10BytePatch;
420     }
421 #endif  // defined(_M_X64)
422 
423     mFlags = Some(aFlags);
424   }
425 
Initialized()426   bool Initialized() const { return mFlags.isSome(); }
427 
AddHook(FARPROC aTargetFn,intptr_t aHookDest,void ** aOrigFunc)428   bool AddHook(FARPROC aTargetFn, intptr_t aHookDest, void** aOrigFunc) {
429     ReadOnlyTargetFunction<MMPolicyT> target(
430         (mFlags.value() & DetourFlags::eDontResolveRedirection)
431             ? ReadOnlyTargetFunction<MMPolicyT>(
432                   this->mVMPolicy, reinterpret_cast<uintptr_t>(aTargetFn))
433             : this->ResolveRedirectedAddress(aTargetFn));
434 
435     TrampPoolT* trampPool = nullptr;
436 
437 #if defined(_M_ARM64)
438     // ARM64 uses two passes to build its trampoline. The first pass uses a
439     // null tramp to determine how many bytes are needed. Once that is known,
440     // CreateTrampoline calls itself recursively with a "real" tramp.
441     Trampoline<MMPolicyT> tramp(nullptr);
442 #else
443     Maybe<TrampPoolT> maybeTrampPool = DoReserve();
444     MOZ_ASSERT(maybeTrampPool);
445     if (!maybeTrampPool) {
446       return false;
447     }
448 
449     trampPool = maybeTrampPool.ptr();
450 
451     Maybe<Trampoline<MMPolicyT>> maybeTramp(trampPool->GetNextTrampoline());
452     if (!maybeTramp) {
453       this->SetLastDetourError(
454           DetourResultCode::DETOUR_PATCHER_NEXT_TRAMPOLINE_ERROR);
455       return false;
456     }
457 
458     Trampoline<MMPolicyT> tramp(std::move(maybeTramp.ref()));
459 #endif
460 
461     CreateTrampoline(target, trampPool, tramp, aHookDest, aOrigFunc);
462     if (!*aOrigFunc) {
463       return false;
464     }
465 
466     return true;
467   }
468 
469  private:
470   /**
471    * This function returns a maximum distance that can be reached by a single
472    * unconditional jump instruction. This is dependent on the processor ISA.
473    * Note that this distance is *exclusive* when added to the pivot, so the
474    * distance returned by this function is actually
475    * (maximum_absolute_offset + 1).
476    */
GetDefaultPivotDistance()477   static uint32_t GetDefaultPivotDistance() {
478 #if defined(_M_ARM64)
479     // Immediate unconditional branch allows for +/- 128MB
480     return 0x08000000U;
481 #elif defined(_M_IX86) || defined(_M_X64)
482     // For these ISAs, our distance will assume the use of an unconditional jmp
483     // with a 32-bit signed displacement.
484     return 0x80000000U;
485 #else
486 #  error "Not defined for this processor arch"
487 #endif
488   }
489 
490   /**
491    * If we're reserving trampoline space for a specific module, we base the
492    * pivot off of the median address of the module's .text section. While this
493    * may not be precise, it should be accurate enough for our purposes: To
494    * ensure that the trampoline space is reachable by any executable code in the
495    * module.
496    */
ReserveForModule(HMODULE aModule)497   Maybe<TrampPoolT> ReserveForModule(HMODULE aModule) {
498     nt::PEHeaders moduleHeaders(aModule);
499     if (!moduleHeaders) {
500       this->SetLastDetourError(
501           DetourResultCode::DETOUR_PATCHER_RESERVE_FOR_MODULE_PE_ERROR);
502       return Nothing();
503     }
504 
505     Maybe<Span<const uint8_t>> textSectionInfo =
506         moduleHeaders.GetTextSectionInfo();
507     if (!textSectionInfo) {
508       this->SetLastDetourError(
509           DetourResultCode::DETOUR_PATCHER_RESERVE_FOR_MODULE_TEXT_ERROR);
510       return Nothing();
511     }
512 
513     const uint8_t* median = textSectionInfo.value().data() +
514                             (textSectionInfo.value().LengthBytes() / 2);
515 
516     Maybe<TrampPoolT> maybeTrampPool = this->mVMPolicy.Reserve(
517         reinterpret_cast<uintptr_t>(median), GetDefaultPivotDistance());
518     if (!maybeTrampPool) {
519       this->SetLastDetourError(
520           DetourResultCode::DETOUR_PATCHER_RESERVE_FOR_MODULE_RESERVE_ERROR);
521     }
522     return maybeTrampPool;
523   }
524 
525   Maybe<TrampPoolT> DoReserve(HMODULE aModule = nullptr) {
526     if (aModule) {
527       return ReserveForModule(aModule);
528     }
529 
530     uintptr_t pivot = 0;
531     uint32_t distance = 0;
532 
533 #if defined(_M_X64)
534     if (mFlags.value() & DetourFlags::eEnable10BytePatch) {
535       // We must stay below the 2GB mark because a 10-byte patch uses movsxd
536       // (ie, sign extension) to expand the pointer to 64-bits, so bit 31 of any
537       // pointers into the reserved region must be 0.
538       pivot = 0x40000000U;
539       distance = 0x40000000U;
540     }
541 #endif  // defined(_M_X64)
542 
543     Maybe<TrampPoolT> maybeTrampPool = this->mVMPolicy.Reserve(pivot, distance);
544 #if defined(NIGHTLY_BUILD)
545     if (!maybeTrampPool && this->GetLastDetourError().isNothing()) {
546       this->SetLastDetourError(
547           DetourResultCode::DETOUR_PATCHER_DO_RESERVE_ERROR);
548     }
549 #endif  // defined(NIGHTLY_BUILD)
550     return maybeTrampPool;
551   }
552 
553  protected:
554 #if !defined(_M_ARM64)
555 
556   const static int kPageSize = 4096;
557 
558   // rex bits
559   static const BYTE kMaskHighNibble = 0xF0;
560   static const BYTE kRexOpcode = 0x40;
561   static const BYTE kMaskRexW = 0x08;
562   static const BYTE kMaskRexR = 0x04;
563   static const BYTE kMaskRexX = 0x02;
564   static const BYTE kMaskRexB = 0x01;
565 
566   // mod r/m bits
567   static const BYTE kRegFieldShift = 3;
568   static const BYTE kMaskMod = 0xC0;
569   static const BYTE kMaskReg = 0x38;
570   static const BYTE kMaskRm = 0x07;
571   static const BYTE kRmNeedSib = 0x04;
572   static const BYTE kModReg = 0xC0;
573   static const BYTE kModDisp32 = 0x80;
574   static const BYTE kModDisp8 = 0x40;
575   static const BYTE kModNoRegDisp = 0x00;
576   static const BYTE kRmNoRegDispDisp32 = 0x05;
577 
578   // sib bits
579   static const BYTE kMaskSibScale = 0xC0;
580   static const BYTE kMaskSibIndex = 0x38;
581   static const BYTE kMaskSibBase = 0x07;
582   static const BYTE kSibBaseEbp = 0x05;
583 
584   // Register bit IDs.
585   static const BYTE kRegAx = 0x0;
586   static const BYTE kRegCx = 0x1;
587   static const BYTE kRegDx = 0x2;
588   static const BYTE kRegBx = 0x3;
589   static const BYTE kRegSp = 0x4;
590   static const BYTE kRegBp = 0x5;
591   static const BYTE kRegSi = 0x6;
592   static const BYTE kRegDi = 0x7;
593 
594   // Special ModR/M codes.  These indicate operands that cannot be simply
595   // memcpy-ed.
596   // Operand is a 64-bit RIP-relative address.
597   static const int kModOperand64 = -2;
598   // Operand is not yet handled by our trampoline.
599   static const int kModUnknown = -1;
600 
601   /**
602    * Returns the number of bytes taken by the ModR/M byte, SIB (if present)
603    * and the instruction's operand.  In special cases, the special MODRM codes
604    * above are returned.
605    * aModRm points to the ModR/M byte of the instruction.
606    * On return, aSubOpcode (if present) is filled with the subopcode/register
607    * code found in the ModR/M byte.
608    */
609   int CountModRmSib(const ReadOnlyTargetFunction<MMPolicyT>& aModRm,
610                     BYTE* aSubOpcode = nullptr) {
611     int numBytes = 1;  // Start with 1 for mod r/m byte itself
612     switch (*aModRm & kMaskMod) {
613       case kModReg:
614         return numBytes;
615       case kModDisp8:
616         numBytes += 1;
617         break;
618       case kModDisp32:
619         numBytes += 4;
620         break;
621       case kModNoRegDisp:
622         if ((*aModRm & kMaskRm) == kRmNoRegDispDisp32) {
623 #  if defined(_M_X64)
624           if (aSubOpcode) {
625             *aSubOpcode = (*aModRm & kMaskReg) >> kRegFieldShift;
626           }
627           return kModOperand64;
628 #  else
629           // On IA-32, all ModR/M instruction modes address memory relative to 0
630           numBytes += 4;
631 #  endif
632         } else if (((*aModRm & kMaskRm) == kRmNeedSib &&
633                     (*(aModRm + 1) & kMaskSibBase) == kSibBaseEbp)) {
634           numBytes += 4;
635         }
636         break;
637       default:
638         // This should not be reachable
639         MOZ_ASSERT_UNREACHABLE("Impossible value for modr/m byte mod bits");
640         return kModUnknown;
641     }
642     if ((*aModRm & kMaskRm) == kRmNeedSib) {
643       // SIB byte
644       numBytes += 1;
645     }
646     if (aSubOpcode) {
647       *aSubOpcode = (*aModRm & kMaskReg) >> kRegFieldShift;
648     }
649     return numBytes;
650   }
651 
652 #  if defined(_M_X64)
653   enum class JumpType{Je, Jne, Jae, Jmp, Call};
654 
GenerateJump(Trampoline<MMPolicyT> & aTramp,uintptr_t aAbsTargetAddress,const JumpType aType)655   static bool GenerateJump(Trampoline<MMPolicyT>& aTramp,
656                            uintptr_t aAbsTargetAddress, const JumpType aType) {
657     // Near call, absolute indirect, address given in r/m32
658     if (aType == JumpType::Call) {
659       // CALL [RIP+0]
660       aTramp.WriteByte(0xff);
661       aTramp.WriteByte(0x15);
662       // The offset to jump destination -- 2 bytes after the current position.
663       aTramp.WriteInteger(2);
664       aTramp.WriteByte(0xeb);  // JMP + 8 (jump over target address)
665       aTramp.WriteByte(8);
666       aTramp.WritePointer(aAbsTargetAddress);
667       return !!aTramp;
668     }
669 
670     // Write an opposite conditional jump because the destination branches
671     // are swapped.
672     if (aType == JumpType::Je) {
673       // JNE RIP+14
674       aTramp.WriteByte(0x75);
675       aTramp.WriteByte(14);
676     } else if (aType == JumpType::Jne) {
677       // JE RIP+14
678       aTramp.WriteByte(0x74);
679       aTramp.WriteByte(14);
680     } else if (aType == JumpType::Jae) {
681       // JB RIP+14
682       aTramp.WriteByte(0x72);
683       aTramp.WriteByte(14);
684     }
685 
686     // Near jmp, absolute indirect, address given in r/m32
687     // JMP [RIP+0]
688     aTramp.WriteByte(0xff);
689     aTramp.WriteByte(0x25);
690     // The offset to jump destination is 0
691     aTramp.WriteInteger(0);
692     aTramp.WritePointer(aAbsTargetAddress);
693 
694     return !!aTramp;
695   }
696 #  endif
697 
698   enum ePrefixGroupBits{eNoPrefixes = 0, ePrefixGroup1 = (1 << 0),
699                         ePrefixGroup2 = (1 << 1), ePrefixGroup3 = (1 << 2),
700                         ePrefixGroup4 = (1 << 3)};
701 
CountPrefixBytes(const ReadOnlyTargetFunction<MMPolicyT> & aBytes,unsigned char * aOutGroupBits)702   int CountPrefixBytes(const ReadOnlyTargetFunction<MMPolicyT>& aBytes,
703                        unsigned char* aOutGroupBits) {
704     unsigned char& groupBits = *aOutGroupBits;
705     groupBits = eNoPrefixes;
706     int index = 0;
707     while (true) {
708       switch (aBytes[index]) {
709         // Group 1
710         case 0xF0:  // LOCK
711         case 0xF2:  // REPNZ
712         case 0xF3:  // REP / REPZ
713           if (groupBits & ePrefixGroup1) {
714             return -1;
715           }
716           groupBits |= ePrefixGroup1;
717           ++index;
718           break;
719 
720         // Group 2
721         case 0x2E:  // CS override / branch not taken
722         case 0x36:  // SS override
723         case 0x3E:  // DS override / branch taken
724         case 0x64:  // FS override
725         case 0x65:  // GS override
726           if (groupBits & ePrefixGroup2) {
727             return -1;
728           }
729           groupBits |= ePrefixGroup2;
730           ++index;
731           break;
732 
733         // Group 3
734         case 0x66:  // operand size override
735           if (groupBits & ePrefixGroup3) {
736             return -1;
737           }
738           groupBits |= ePrefixGroup3;
739           ++index;
740           break;
741 
742         // Group 4
743         case 0x67:  // Address size override
744           if (groupBits & ePrefixGroup4) {
745             return -1;
746           }
747           groupBits |= ePrefixGroup4;
748           ++index;
749           break;
750 
751         default:
752           return index;
753       }
754     }
755   }
756 
757   // Return a ModR/M byte made from the 2 Mod bits, the register used for the
758   // reg bits and the register used for the R/M bits.
BuildModRmByte(BYTE aModBits,BYTE aReg,BYTE aRm)759   BYTE BuildModRmByte(BYTE aModBits, BYTE aReg, BYTE aRm) {
760     MOZ_ASSERT((aRm & kMaskRm) == aRm);
761     MOZ_ASSERT((aModBits & kMaskMod) == aModBits);
762     MOZ_ASSERT(((aReg << kRegFieldShift) & kMaskReg) ==
763                (aReg << kRegFieldShift));
764     return aModBits | (aReg << kRegFieldShift) | aRm;
765   }
766 
767 #endif  // !defined(_M_ARM64)
768 
769   // If originalFn is a recognized trampoline then patch it to call aDest,
770   // set *aTramp and *aOutTramp to that trampoline's target and return true.
PatchIfTargetIsRecognizedTrampoline(Trampoline<MMPolicyT> & aTramp,ReadOnlyTargetFunction<MMPolicyT> & aOriginalFn,intptr_t aDest,void ** aOutTramp)771   bool PatchIfTargetIsRecognizedTrampoline(
772       Trampoline<MMPolicyT>& aTramp,
773       ReadOnlyTargetFunction<MMPolicyT>& aOriginalFn, intptr_t aDest,
774       void** aOutTramp) {
775 #if defined(_M_X64)
776     // Variation 1:
777     // 48 b8 imm64  mov rax, imm64
778     // ff e0        jmp rax
779     //
780     // Variation 2:
781     // 48 b8 imm64  mov rax, imm64
782     // 50           push rax
783     // c3           ret
784     if ((aOriginalFn[0] == 0x48) && (aOriginalFn[1] == 0xB8) &&
785         ((aOriginalFn[10] == 0xFF && aOriginalFn[11] == 0xE0) ||
786          (aOriginalFn[10] == 0x50 && aOriginalFn[11] == 0xC3))) {
787       uintptr_t originalTarget =
788           (aOriginalFn + 2).template ChasePointer<uintptr_t>();
789 
790       // Skip the first two bytes (48 b8) so that we can overwrite the imm64
791       WritableTargetFunction<MMPolicyT> target(aOriginalFn.Promote(8, 2));
792       if (!target) {
793         return false;
794       }
795 
796       // Write the new JMP target address.
797       target.WritePointer(aDest);
798       if (!target.Commit()) {
799         return false;
800       }
801 
802       // Store the old target address so we can restore it when we're cleared
803       aTramp.WritePointer(originalTarget);
804       if (!aTramp) {
805         return false;
806       }
807 
808       *aOutTramp = reinterpret_cast<void*>(originalTarget);
809       return true;
810     }
811 #endif  // defined(_M_X64)
812 
813     return false;
814   }
815 
816 #if defined(_M_ARM64)
Apply4BytePatch(TrampPoolT * aTrampPool,void * aTrampPtr,WritableTargetFunction<MMPolicyT> & target,intptr_t aDest)817   bool Apply4BytePatch(TrampPoolT* aTrampPool, void* aTrampPtr,
818                        WritableTargetFunction<MMPolicyT>& target,
819                        intptr_t aDest) {
820     MOZ_ASSERT(aTrampPool);
821     if (!aTrampPool) {
822       return false;
823     }
824 
825     uintptr_t hookDest = arm64::MakeVeneer(*aTrampPool, aTrampPtr, aDest);
826     if (!hookDest) {
827       return false;
828     }
829 
830     Maybe<uint32_t> branchImm = arm64::BuildUnconditionalBranchImm(
831         target.GetCurrentAddress(), hookDest);
832     if (!branchImm) {
833       return false;
834     }
835 
836     target.WriteLong(branchImm.value());
837 
838     return true;
839   }
840 #endif  // defined(_M_ARM64)
841 
842 #if defined(_M_X64)
Apply10BytePatch(TrampPoolT * aTrampPool,void * aTrampPtr,WritableTargetFunction<MMPolicyT> & target,intptr_t aDest)843   bool Apply10BytePatch(TrampPoolT* aTrampPool, void* aTrampPtr,
844                         WritableTargetFunction<MMPolicyT>& target,
845                         intptr_t aDest) {
846     // Note: Even if the target function is also below 2GB, we still use an
847     // intermediary trampoline so that we consistently have a 64-bit pointer
848     // that we can use to reset the trampoline upon interceptor shutdown.
849     Maybe<Trampoline<MMPolicyT>> maybeCallTramp(
850         aTrampPool->GetNextTrampoline());
851     if (!maybeCallTramp) {
852       return false;
853     }
854 
855     Trampoline<MMPolicyT> callTramp(std::move(maybeCallTramp.ref()));
856 
857     // Write a null instance so that Clear() does not consider this tramp to
858     // be a normal tramp to be torn down.
859     callTramp.WriteEncodedPointer(nullptr);
860     // Use the second pointer slot to store a pointer to the primary tramp
861     callTramp.WriteEncodedPointer(aTrampPtr);
862     callTramp.StartExecutableCode();
863 
864     // mov r11, address
865     callTramp.WriteByte(0x49);
866     callTramp.WriteByte(0xbb);
867     callTramp.WritePointer(aDest);
868 
869     // jmp r11
870     callTramp.WriteByte(0x41);
871     callTramp.WriteByte(0xff);
872     callTramp.WriteByte(0xe3);
873 
874     void* callTrampStart = callTramp.EndExecutableCode();
875     if (!callTrampStart) {
876       return false;
877     }
878 
879     target.WriteByte(0xB8);  // MOV EAX, IMM32
880 
881     // Assert that the topmost 33 bits are 0
882     MOZ_ASSERT(
883         !(reinterpret_cast<uintptr_t>(callTrampStart) & (~0x7FFFFFFFULL)));
884 
885     target.WriteLong(static_cast<uint32_t>(
886         reinterpret_cast<uintptr_t>(callTrampStart) & 0x7FFFFFFFU));
887     target.WriteByte(0x48);  // REX.W
888     target.WriteByte(0x63);  // MOVSXD r64, r/m32
889     // dest: rax, src: eax
890     target.WriteByte(BuildModRmByte(kModReg, kRegAx, kRegAx));
891     target.WriteByte(0xFF);                                // JMP /4
892     target.WriteByte(BuildModRmByte(kModReg, 4, kRegAx));  // rax
893 
894     return true;
895   }
896 #endif  // defined(_M_X64)
897 
CreateTrampoline(ReadOnlyTargetFunction<MMPolicyT> & origBytes,TrampPoolT * aTrampPool,Trampoline<MMPolicyT> & aTramp,intptr_t aDest,void ** aOutTramp)898   void CreateTrampoline(ReadOnlyTargetFunction<MMPolicyT>& origBytes,
899                         TrampPoolT* aTrampPool, Trampoline<MMPolicyT>& aTramp,
900                         intptr_t aDest, void** aOutTramp) {
901     *aOutTramp = nullptr;
902 
903     Trampoline<MMPolicyT>& tramp = aTramp;
904     if (!tramp) {
905       this->SetLastDetourError(
906           DetourResultCode::DETOUR_PATCHER_INVALID_TRAMPOLINE);
907       return;
908     }
909 
910     // The beginning of the trampoline contains two pointer-width slots:
911     // [0]: |this|, so that we know whether the trampoline belongs to us;
912     // [1]: Pointer to original function, so that we can reset the hooked
913     // function to its original behavior upon destruction.  In rare cases
914     // where the function was already a different trampoline, this is
915     // just a pointer to that trampoline's target address.
916     tramp.WriteEncodedPointer(this);
917     if (!tramp) {
918       this->SetLastDetourError(
919           DetourResultCode::DETOUR_PATCHER_WRITE_POINTER_ERROR);
920       return;
921     }
922 
923     auto clearInstanceOnFailure = MakeScopeExit([this, aOutTramp, &tramp,
924                                                  &origBytes]() -> void {
925       // *aOutTramp is not set until CreateTrampoline has completed
926       // successfully, so we can use that to check for success.
927       if (*aOutTramp) {
928         return;
929       }
930 
931       // Clear the instance pointer so that we don't try to reset a
932       // nonexistent hook.
933       tramp.Rewind();
934       tramp.WriteEncodedPointer(nullptr);
935 
936 #if defined(NIGHTLY_BUILD)
937       origBytes.Rewind();
938       this->SetLastDetourError(
939           DetourResultCode::DETOUR_PATCHER_CREATE_TRAMPOLINE_ERROR);
940       DetourError& lastError = *this->mVMPolicy.mLastError;
941       size_t bytesToCapture = std::min(
942           ArrayLength(lastError.mOrigBytes),
943           static_cast<size_t>(PrimitiveT::GetWorstCaseRequiredBytesToPatch()));
944 #  if defined(_M_ARM64)
945       size_t numInstructionsToCapture = bytesToCapture / sizeof(uint32_t);
946       auto origBytesDst = reinterpret_cast<uint32_t*>(lastError.mOrigBytes);
947       for (size_t i = 0; i < numInstructionsToCapture; ++i) {
948         origBytesDst[i] = origBytes.ReadNextInstruction();
949       }
950 #  else
951       for (size_t i = 0; i < bytesToCapture; ++i) {
952         lastError.mOrigBytes[i] = origBytes[i];
953       }
954 #  endif  // defined(_M_ARM64)
955 #else
956       // Silence -Wunused-lambda-capture in non-Nightly.
957       Unused << this;
958       Unused << origBytes;
959 #endif  // defined(NIGHTLY_BUILD)
960     });
961 
962     tramp.WritePointer(origBytes.AsEncodedPtr());
963     if (!tramp) {
964       return;
965     }
966 
967     if (PatchIfTargetIsRecognizedTrampoline(tramp, origBytes, aDest,
968                                             aOutTramp)) {
969       return;
970     }
971 
972     tramp.StartExecutableCode();
973 
974     constexpr uint32_t kWorstCaseBytesRequired =
975         PrimitiveT::GetWorstCaseRequiredBytesToPatch();
976 
977 #if defined(_M_IX86)
978     int pJmp32 = -1;
979     while (origBytes.GetOffset() < kWorstCaseBytesRequired) {
980       // Understand some simple instructions that might be found in a
981       // prologue; we might need to extend this as necessary.
982       //
983       // Note!  If we ever need to understand jump instructions, we'll
984       // need to rewrite the displacement argument.
985       unsigned char prefixGroups;
986       int numPrefixBytes = CountPrefixBytes(origBytes, &prefixGroups);
987       if (numPrefixBytes < 0 ||
988           (prefixGroups & (ePrefixGroup3 | ePrefixGroup4))) {
989         // Either the prefix sequence was bad, or there are prefixes that
990         // we don't currently support (groups 3 and 4)
991         MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
992         return;
993       }
994 
995       origBytes += numPrefixBytes;
996       if (*origBytes >= 0x88 && *origBytes <= 0x8B) {
997         // various MOVs
998         ++origBytes;
999         int len = CountModRmSib(origBytes);
1000         if (len < 0) {
1001           MOZ_ASSERT_UNREACHABLE("Unrecognized MOV opcode sequence");
1002           return;
1003         }
1004         origBytes += len;
1005       } else if (*origBytes == 0x0f &&
1006                  (origBytes[1] == 0x10 || origBytes[1] == 0x11)) {
1007         // SSE: movups xmm, xmm/m128
1008         //      movups xmm/m128, xmm
1009         origBytes += 2;
1010         int len = CountModRmSib(origBytes);
1011         if (len < 0) {
1012           MOZ_ASSERT_UNREACHABLE("Unrecognized MOV opcode sequence");
1013           return;
1014         }
1015         origBytes += len;
1016       } else if (*origBytes == 0xA1) {
1017         // MOV eax, [seg:offset]
1018         origBytes += 5;
1019       } else if (*origBytes == 0xB8) {
1020         // MOV 0xB8: http://ref.x86asm.net/coder32.html#xB8
1021         origBytes += 5;
1022       } else if (*origBytes == 0x33 && (origBytes[1] & kMaskMod) == kModReg) {
1023         // XOR r32, r32
1024         origBytes += 2;
1025       } else if ((*origBytes & 0xf8) == 0x40) {
1026         // INC r32
1027         origBytes += 1;
1028       } else if (*origBytes == 0x83) {
1029         uint8_t mod = static_cast<uint8_t>(origBytes[1]) & kMaskMod;
1030         uint8_t rm = static_cast<uint8_t>(origBytes[1]) & kMaskRm;
1031         if (mod == kModReg) {
1032           // ADD|OR|ADC|SBB|AND|SUB|XOR|CMP r, imm8
1033           origBytes += 3;
1034         } else if (mod == kModDisp8 && rm != kRmNeedSib) {
1035           // ADD|OR|ADC|SBB|AND|SUB|XOR|CMP [r+disp8], imm8
1036           origBytes += 4;
1037         } else {
1038           // bail
1039           MOZ_ASSERT_UNREACHABLE("Unrecognized bit opcode sequence");
1040           return;
1041         }
1042       } else if (*origBytes == 0x68) {
1043         // PUSH with 4-byte operand
1044         origBytes += 5;
1045       } else if ((*origBytes & 0xf0) == 0x50) {
1046         // 1-byte PUSH/POP
1047         ++origBytes;
1048       } else if (*origBytes == 0x6A) {
1049         // PUSH imm8
1050         origBytes += 2;
1051       } else if (*origBytes == 0xe9) {
1052         pJmp32 = origBytes.GetOffset();
1053         // jmp 32bit offset
1054         origBytes += 5;
1055       } else if (*origBytes == 0xff && origBytes[1] == 0x25) {
1056         // jmp [disp32]
1057         origBytes += 6;
1058       } else if (*origBytes == 0xc2) {
1059         // ret imm16.  We can't handle this but it happens.  We don't ASSERT but
1060         // we do fail to hook.
1061 #  if defined(MOZILLA_INTERNAL_API)
1062         NS_WARNING("Cannot hook method -- RET opcode found");
1063 #  endif
1064         return;
1065       } else {
1066         // printf ("Unknown x86 instruction byte 0x%02x, aborting trampoline\n",
1067         // *origBytes);
1068         MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1069         return;
1070       }
1071     }
1072 
1073     // The trampoline is a copy of the instructions that we just traced,
1074     // followed by a jump that we add below.
1075     tramp.CopyFrom(origBytes.GetBaseAddress(), origBytes.GetOffset());
1076     if (!tramp) {
1077       return;
1078     }
1079 #elif defined(_M_X64)
1080     bool foundJmp = false;
1081     // |use10BytePatch| should always default to |false| in production. It is
1082     // not set to true unless we detect that a 10-byte patch is necessary.
1083     // OTOH, for testing purposes, if we want to force a 10-byte patch, we
1084     // always initialize |use10BytePatch| to |true|.
1085     bool use10BytePatch =
1086         (mFlags.value() & DetourFlags::eTestOnlyForceShortPatch) ==
1087         DetourFlags::eTestOnlyForceShortPatch;
1088     const uint32_t bytesRequired =
1089         use10BytePatch ? 10 : kWorstCaseBytesRequired;
1090 
1091     while (origBytes.GetOffset() < bytesRequired) {
1092       // If we found JMP 32bit offset, we require that the next bytes must
1093       // be NOP or INT3.  There is no reason to copy them.
1094       // TODO: This used to trigger for Je as well.  Now that I allow
1095       // instructions after CALL and JE, I don't think I need that.
1096       // The only real value of this condition is that if code follows a JMP
1097       // then its _probably_ the target of a JMP somewhere else and we
1098       // will be overwriting it, which would be tragic.  This seems
1099       // highly unlikely.
1100       if (foundJmp) {
1101         if (*origBytes == 0x90 || *origBytes == 0xcc) {
1102           ++origBytes;
1103           continue;
1104         }
1105 
1106         // If our trampoline space is located in the lowest 2GB, we can do a ten
1107         // byte patch instead of a thirteen byte patch.
1108         if (aTrampPool && aTrampPool->IsInLowest2GB() &&
1109             origBytes.GetOffset() >= 10) {
1110           use10BytePatch = true;
1111           break;
1112         }
1113 
1114         MOZ_ASSERT_UNREACHABLE("Opcode sequence includes commands after JMP");
1115         return;
1116       }
1117       if (*origBytes == 0x0f) {
1118         COPY_CODES(1);
1119         if (*origBytes == 0x1f) {
1120           // nop (multibyte)
1121           COPY_CODES(1);
1122           if ((*origBytes & 0xc0) == 0x40 && (*origBytes & 0x7) == 0x04) {
1123             COPY_CODES(3);
1124           } else {
1125             MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1126             return;
1127           }
1128         } else if (*origBytes == 0x05) {
1129           // syscall
1130           COPY_CODES(1);
1131         } else if (*origBytes == 0x10 || *origBytes == 0x11) {
1132           // SSE: movups xmm, xmm/m128
1133           //      movups xmm/m128, xmm
1134           COPY_CODES(1);
1135           int nModRmSibBytes = CountModRmSib(origBytes);
1136           if (nModRmSibBytes < 0) {
1137             MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1138             return;
1139           } else {
1140             COPY_CODES(nModRmSibBytes);
1141           }
1142         } else if (*origBytes >= 0x83 && *origBytes <= 0x85) {
1143           // 0f 83 cd    JAE rel32
1144           // 0f 84 cd    JE  rel32
1145           // 0f 85 cd    JNE rel32
1146           const JumpType kJumpTypes[] = {JumpType::Jae, JumpType::Je,
1147                                          JumpType::Jne};
1148           auto jumpType = kJumpTypes[*origBytes - 0x83];
1149           ++origBytes;
1150           --tramp;  // overwrite the 0x0f we copied above
1151 
1152           if (!GenerateJump(tramp, origBytes.ReadDisp32AsAbsolute(),
1153                             jumpType)) {
1154             return;
1155           }
1156         } else {
1157           MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1158           return;
1159         }
1160       } else if (*origBytes >= 0x88 && *origBytes <= 0x8B) {
1161         // various 32-bit MOVs
1162         COPY_CODES(1);
1163         int len = CountModRmSib(origBytes);
1164         if (len < 0) {
1165           MOZ_ASSERT_UNREACHABLE("Unrecognized MOV opcode sequence");
1166           return;
1167         }
1168         COPY_CODES(len);
1169       } else if (*origBytes == 0x40 || *origBytes == 0x41) {
1170         // Plain REX or REX.B
1171         COPY_CODES(1);
1172         if ((*origBytes & 0xf0) == 0x50) {
1173           // push/pop with Rx register
1174           COPY_CODES(1);
1175         } else if (*origBytes >= 0xb8 && *origBytes <= 0xbf) {
1176           // mov r32, imm32
1177           COPY_CODES(5);
1178         } else {
1179           MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1180           return;
1181         }
1182       } else if (*origBytes == 0x44) {
1183         // REX.R
1184         COPY_CODES(1);
1185 
1186         // TODO: Combine with the "0x89" case below in the REX.W section
1187         if (*origBytes == 0x89) {
1188           // mov r/m32, r32
1189           COPY_CODES(1);
1190           int len = CountModRmSib(origBytes);
1191           if (len < 0) {
1192             MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1193             return;
1194           }
1195           COPY_CODES(len);
1196         } else {
1197           MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1198           return;
1199         }
1200       } else if (*origBytes == 0x45) {
1201         // REX.R & REX.B
1202         COPY_CODES(1);
1203 
1204         if (*origBytes == 0x33) {
1205           // xor r32, r32
1206           COPY_CODES(2);
1207         } else {
1208           MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1209           return;
1210         }
1211       } else if ((*origBytes & 0xfa) == 0x48) {
1212         // REX.W | REX.WR | REX.WRB | REX.WB
1213         COPY_CODES(1);
1214 
1215         if (*origBytes == 0x81 && (origBytes[1] & 0xf8) == 0xe8) {
1216           // sub r, dword
1217           COPY_CODES(6);
1218         } else if (*origBytes == 0x83 && (origBytes[1] & 0xf8) == 0xe8) {
1219           // sub r, byte
1220           COPY_CODES(3);
1221         } else if (*origBytes == 0x83 &&
1222                    (origBytes[1] & (kMaskMod | kMaskReg)) == kModReg) {
1223           // add r, byte
1224           COPY_CODES(3);
1225         } else if (*origBytes == 0x83 && (origBytes[1] & 0xf8) == 0x60) {
1226           // and [r+d], imm8
1227           COPY_CODES(5);
1228         } else if (*origBytes == 0x2b && (origBytes[1] & kMaskMod) == kModReg) {
1229           // sub r64, r64
1230           COPY_CODES(2);
1231         } else if (*origBytes == 0x85) {
1232           // 85 /r => TEST r/m32, r32
1233           if ((origBytes[1] & 0xc0) == 0xc0) {
1234             COPY_CODES(2);
1235           } else {
1236             MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1237             return;
1238           }
1239         } else if ((*origBytes & 0xfd) == 0x89) {
1240           // MOV r/m64, r64 | MOV r64, r/m64
1241           BYTE reg;
1242           int len = CountModRmSib(origBytes + 1, &reg);
1243           if (len < 0) {
1244             MOZ_ASSERT(len == kModOperand64);
1245             if (len != kModOperand64) {
1246               return;
1247             }
1248             origBytes += 2;  // skip the MOV and MOD R/M bytes
1249 
1250             // The instruction MOVs 64-bit data from a RIP-relative memory
1251             // address (determined with a 32-bit offset from RIP) into a
1252             // 64-bit register.
1253             uintptr_t absAddr = origBytes.ReadDisp32AsAbsolute();
1254 
1255             if (reg == kRegAx) {
1256               // Destination is RAX.  Encode instruction as MOVABS with a
1257               // 64-bit absolute address as its immediate operand.
1258               tramp.WriteByte(0xa1);
1259               tramp.WritePointer(absAddr);
1260             } else {
1261               // The MOV must be done in two steps.  First, we MOVABS the
1262               // absolute 64-bit address into our target register.
1263               // Then, we MOV from that address into the register
1264               // using register-indirect addressing.
1265               tramp.WriteByte(0xb8 + reg);
1266               tramp.WritePointer(absAddr);
1267               tramp.WriteByte(0x48);
1268               tramp.WriteByte(0x8b);
1269               tramp.WriteByte(BuildModRmByte(kModNoRegDisp, reg, reg));
1270             }
1271           } else {
1272             COPY_CODES(len + 1);
1273           }
1274         } else if ((*origBytes & 0xf8) == 0xb8) {
1275           // MOV r64, imm64
1276           COPY_CODES(9);
1277         } else if (*origBytes == 0xc7) {
1278           // MOV r/m64, imm32
1279           if (origBytes[1] == 0x44) {
1280             // MOV [r64+disp8], imm32
1281             // ModR/W + SIB + disp8 + imm32
1282             COPY_CODES(8);
1283           } else {
1284             MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1285             return;
1286           }
1287         } else if (*origBytes == 0xff) {
1288           // JMP /4
1289           if ((origBytes[1] & 0xc0) == 0x0 && (origBytes[1] & 0x07) == 0x5) {
1290             origBytes += 2;
1291             --tramp;  // overwrite the REX.W/REX.RW we copied above
1292 
1293             if (!GenerateJump(tramp, origBytes.ChasePointerFromDisp(),
1294                               JumpType::Jmp)) {
1295               return;
1296             }
1297 
1298             foundJmp = true;
1299           } else {
1300             // not support yet!
1301             MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1302             return;
1303           }
1304         } else if (*origBytes == 0x8d) {
1305           // LEA reg, addr
1306           if ((origBytes[1] & kMaskMod) == 0x0 &&
1307               (origBytes[1] & kMaskRm) == 0x5) {
1308             // [rip+disp32]
1309             // convert 32bit offset to 64bit direct and convert instruction
1310             // to a simple 64-bit mov
1311             BYTE reg = (origBytes[1] & kMaskReg) >> kRegFieldShift;
1312             origBytes += 2;
1313             uintptr_t absAddr = origBytes.ReadDisp32AsAbsolute();
1314             tramp.WriteByte(0xb8 + reg);  // move
1315             tramp.WritePointer(absAddr);
1316           } else {
1317             // Above we dealt with RIP-relative instructions.  Any other
1318             // operand form can simply be copied.
1319             int len = CountModRmSib(origBytes + 1);
1320             // We handled the kModOperand64 -- ie RIP-relative -- case above
1321             MOZ_ASSERT(len > 0);
1322             COPY_CODES(len + 1);
1323           }
1324         } else if (*origBytes == 0x63 && (origBytes[1] & kMaskMod) == kModReg) {
1325           // movsxd r64, r32 (move + sign extend)
1326           COPY_CODES(2);
1327         } else {
1328           // not support yet!
1329           MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1330           return;
1331         }
1332       } else if (*origBytes == 0x66) {
1333         // operand override prefix
1334         COPY_CODES(1);
1335         // This is the same as the x86 version
1336         if (*origBytes >= 0x88 && *origBytes <= 0x8B) {
1337           // various MOVs
1338           unsigned char b = origBytes[1];
1339           if (((b & 0xc0) == 0xc0) ||
1340               (((b & 0xc0) == 0x00) && ((b & 0x07) != 0x04) &&
1341                ((b & 0x07) != 0x05))) {
1342             // REG=r, R/M=r or REG=r, R/M=[r]
1343             COPY_CODES(2);
1344           } else if ((b & 0xc0) == 0x40) {
1345             if ((b & 0x07) == 0x04) {
1346               // REG=r, R/M=[SIB + disp8]
1347               COPY_CODES(4);
1348             } else {
1349               // REG=r, R/M=[r + disp8]
1350               COPY_CODES(3);
1351             }
1352           } else {
1353             // complex MOV, bail
1354             MOZ_ASSERT_UNREACHABLE("Unrecognized MOV opcode sequence");
1355             return;
1356           }
1357         } else if (*origBytes == 0x44 && origBytes[1] == 0x89) {
1358           // mov word ptr [reg+disp8], reg
1359           COPY_CODES(2);
1360           int len = CountModRmSib(origBytes);
1361           if (len < 0) {
1362             // no way to support this yet.
1363             MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1364             return;
1365           }
1366           COPY_CODES(len);
1367         }
1368       } else if ((*origBytes & 0xf0) == 0x50) {
1369         // 1-byte push/pop
1370         COPY_CODES(1);
1371       } else if (*origBytes == 0x65) {
1372         // GS prefix
1373         //
1374         // The entry of GetKeyState on Windows 10 has the following code.
1375         // 65 48 8b 04 25 30 00 00 00    mov   rax,qword ptr gs:[30h]
1376         // (GS prefix + REX + MOV (0x8b) ...)
1377         if (origBytes[1] == 0x48 &&
1378             (origBytes[2] >= 0x88 && origBytes[2] <= 0x8b)) {
1379           COPY_CODES(3);
1380           int len = CountModRmSib(origBytes);
1381           if (len < 0) {
1382             // no way to support this yet.
1383             MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1384             return;
1385           }
1386           COPY_CODES(len);
1387         } else {
1388           MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1389           return;
1390         }
1391       } else if (*origBytes == 0x80 && origBytes[1] == 0x3d) {
1392         origBytes += 2;
1393 
1394         // cmp byte ptr [rip-relative address], imm8
1395         // We'll compute the absolute address and do the cmp in r11
1396 
1397         // push r11 (to save the old value)
1398         tramp.WriteByte(0x49);
1399         tramp.WriteByte(0x53);
1400 
1401         uintptr_t absAddr = origBytes.ReadDisp32AsAbsolute();
1402 
1403         // mov r11, absolute address
1404         tramp.WriteByte(0x49);
1405         tramp.WriteByte(0xbb);
1406         tramp.WritePointer(absAddr);
1407 
1408         // cmp byte ptr [r11],...
1409         tramp.WriteByte(0x41);
1410         tramp.WriteByte(0x80);
1411         tramp.WriteByte(0x3b);
1412 
1413         // ...imm8
1414         COPY_CODES(1);
1415 
1416         // pop r11 (doesn't affect the flags from the cmp)
1417         tramp.WriteByte(0x49);
1418         tramp.WriteByte(0x5b);
1419       } else if (*origBytes == 0x90) {
1420         // nop
1421         COPY_CODES(1);
1422       } else if ((*origBytes & 0xf8) == 0xb8) {
1423         // MOV r32, imm32
1424         COPY_CODES(5);
1425       } else if (*origBytes == 0x33) {
1426         // xor r32, r/m32
1427         COPY_CODES(2);
1428       } else if (*origBytes == 0xf6) {
1429         // test r/m8, imm8 (used by ntdll on Windows 10 x64)
1430         // (no flags are affected by near jmp since there is no task switch,
1431         // so it is ok for a jmp to be written immediately after a test)
1432         BYTE subOpcode = 0;
1433         int nModRmSibBytes = CountModRmSib(origBytes + 1, &subOpcode);
1434         if (nModRmSibBytes < 0 || subOpcode != 0) {
1435           // Unsupported
1436           MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1437           return;
1438         }
1439         COPY_CODES(2 + nModRmSibBytes);
1440       } else if (*origBytes == 0x85) {
1441         // test r/m32, r32
1442         int nModRmSibBytes = CountModRmSib(origBytes + 1);
1443         if (nModRmSibBytes < 0) {
1444           MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1445           return;
1446         }
1447         COPY_CODES(1 + nModRmSibBytes);
1448       } else if (*origBytes == 0xd1 && (origBytes[1] & kMaskMod) == kModReg) {
1449         // bit shifts/rotates : (SA|SH|RO|RC)(R|L) r32
1450         // (e.g. 0xd1 0xe0 is SAL, 0xd1 0xc8 is ROR)
1451         COPY_CODES(2);
1452       } else if (*origBytes == 0x83 && (origBytes[1] & kMaskMod) == kModReg) {
1453         // ADD|OR|ADC|SBB|AND|SUB|XOR|CMP r, imm8
1454         COPY_CODES(3);
1455       } else if (*origBytes == 0xc3) {
1456         // ret
1457         COPY_CODES(1);
1458       } else if (*origBytes == 0xcc) {
1459         // int 3
1460         COPY_CODES(1);
1461       } else if (*origBytes == 0xe8 || *origBytes == 0xe9) {
1462         // CALL (0xe8) or JMP (0xe9) 32bit offset
1463         foundJmp = *origBytes == 0xe9;
1464         ++origBytes;
1465 
1466         if (!GenerateJump(tramp, origBytes.ReadDisp32AsAbsolute(),
1467                           foundJmp ? JumpType::Jmp : JumpType::Call)) {
1468           return;
1469         }
1470       } else if (*origBytes >= 0x73 && *origBytes <= 0x75) {
1471         // 73 cb    JAE rel8
1472         // 74 cb    JE  rel8
1473         // 75 cb    JNE rel8
1474         const JumpType kJumpTypes[] = {JumpType::Jae, JumpType::Je,
1475                                        JumpType::Jne};
1476         auto jumpType = kJumpTypes[*origBytes - 0x73];
1477         uint8_t offset = origBytes[1];
1478 
1479         origBytes += 2;
1480 
1481         if (!GenerateJump(tramp, origBytes.OffsetToAbsolute(offset),
1482                           jumpType)) {
1483           return;
1484         }
1485       } else if (*origBytes == 0xff) {
1486         uint8_t mod = origBytes[1] & kMaskMod;
1487         uint8_t reg = (origBytes[1] & kMaskReg) >> kRegFieldShift;
1488         uint8_t rm = origBytes[1] & kMaskRm;
1489         if (mod == kModReg && (reg == 0 || reg == 1 || reg == 2 || reg == 6)) {
1490           // INC|DEC|CALL|PUSH r64
1491           COPY_CODES(2);
1492         } else if (mod == kModNoRegDisp && reg == 2 &&
1493                    rm == kRmNoRegDispDisp32) {
1494           // FF 15    CALL [disp32]
1495           origBytes += 2;
1496           if (!GenerateJump(tramp, origBytes.ChasePointerFromDisp(),
1497                             JumpType::Call)) {
1498             return;
1499           }
1500         } else if (reg == 4) {
1501           // FF /4 (Opcode=ff, REG=4): JMP r/m
1502           if (mod == kModNoRegDisp && rm == kRmNoRegDispDisp32) {
1503             // FF 25    JMP [disp32]
1504             foundJmp = true;
1505 
1506             origBytes += 2;
1507 
1508             uintptr_t jmpDest = origBytes.ChasePointerFromDisp();
1509 
1510             if (!GenerateJump(tramp, jmpDest, JumpType::Jmp)) {
1511               return;
1512             }
1513           } else {
1514             // JMP r/m except JMP [disp32]
1515             int len = CountModRmSib(origBytes + 1);
1516             if (len < 0) {
1517               // RIP-relative not yet supported
1518               MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1519               return;
1520             }
1521 
1522             COPY_CODES(len + 1);
1523 
1524             foundJmp = true;
1525           }
1526         } else {
1527           MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1528           return;
1529         }
1530       } else if (*origBytes == 0x83 && (origBytes[1] & 0xf8) == 0x60) {
1531         // and [r+d], imm8
1532         COPY_CODES(5);
1533       } else if (*origBytes == 0xc6) {
1534         // mov [r+d], imm8
1535         int len = CountModRmSib(origBytes + 1);
1536         if (len < 0) {
1537           // RIP-relative not yet supported
1538           MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1539           return;
1540         }
1541         COPY_CODES(len + 2);
1542       } else {
1543         MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
1544         return;
1545       }
1546     }
1547 #elif defined(_M_ARM64)
1548 
1549     // The number of bytes required to facilitate a detour depends on the
1550     // proximity of the hook function to the target function. In the best case,
1551     // we can branch within +/- 128MB of the current location, requiring only
1552     // 4 bytes. In the worst case, we need 16 bytes to load an absolute address
1553     // into a register and then branch to it.
1554     const uint32_t bytesRequiredFromDecode =
1555         (mFlags.value() & DetourFlags::eTestOnlyForceShortPatch)
1556             ? 4
1557             : kWorstCaseBytesRequired;
1558 
1559     while (origBytes.GetOffset() < bytesRequiredFromDecode) {
1560       uintptr_t curPC = origBytes.GetCurrentAbsolute();
1561       uint32_t curInst = origBytes.ReadNextInstruction();
1562 
1563       Result<arm64::LoadOrBranch, arm64::PCRelCheckError> pcRelInfo =
1564           arm64::CheckForPCRel(curPC, curInst);
1565       if (pcRelInfo.isErr()) {
1566         if (pcRelInfo.unwrapErr() ==
1567             arm64::PCRelCheckError::InstructionNotPCRel) {
1568           // Instruction is not PC-relative, we can just copy it verbatim
1569           tramp.WriteInstruction(curInst);
1570           continue;
1571         }
1572 
1573         // At this point we have determined that there is no decoder available
1574         // for the current, PC-relative, instruction.
1575 
1576         // origBytes is now pointing one instruction past the one that we
1577         // need the trampoline to jump back to.
1578         if (!origBytes.BackUpOneInstruction()) {
1579           return;
1580         }
1581 
1582         break;
1583       }
1584 
1585       // We need to load an absolute address into a particular register
1586       tramp.WriteLoadLiteral(pcRelInfo.inspect().mAbsAddress,
1587                              pcRelInfo.inspect().mDestReg);
1588     }
1589 
1590 #else
1591 #  error "Unknown processor type"
1592 #endif
1593 
1594     if (origBytes.GetOffset() > 100) {
1595       // printf ("Too big!");
1596       return;
1597     }
1598 
1599 #if defined(_M_IX86)
1600     if (pJmp32 >= 0) {
1601       // Jump directly to the original target of the jump instead of jumping to
1602       // the original function. Adjust jump target displacement to jump location
1603       // in the trampoline.
1604       tramp.AdjustDisp32AtOffset(pJmp32 + 1, origBytes.GetBaseAddress());
1605     } else {
1606       tramp.WriteByte(0xe9);  // jmp
1607       tramp.WriteDisp32(origBytes.GetAddress());
1608     }
1609 #elif defined(_M_X64)
1610     // If we found a Jmp, we don't need to add another instruction. However,
1611     // if we found a _conditional_ jump or a CALL (or no control operations
1612     // at all) then we still need to run the rest of aOriginalFunction.
1613     if (!foundJmp) {
1614       if (!GenerateJump(tramp, origBytes.GetAddress(), JumpType::Jmp)) {
1615         return;
1616       }
1617     }
1618 #elif defined(_M_ARM64)
1619     // Let's find out how many bytes we have available to us for patching
1620     uint32_t numBytesForPatching = tramp.GetCurrentExecutableCodeLen();
1621 
1622     if (!numBytesForPatching) {
1623       // There's nothing we can do
1624       return;
1625     }
1626 
1627     if (tramp.IsNull()) {
1628       // Recursive case
1629       HMODULE targetModule = nullptr;
1630 
1631       if (numBytesForPatching < kWorstCaseBytesRequired) {
1632         if (!::GetModuleHandleExW(
1633                 GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS |
1634                     GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
1635                 reinterpret_cast<LPCWSTR>(origBytes.GetBaseAddress()),
1636                 &targetModule)) {
1637           return;
1638         }
1639       }
1640 
1641       Maybe<TrampPoolT> maybeTrampPool = DoReserve(targetModule);
1642       MOZ_ASSERT(maybeTrampPool);
1643       if (!maybeTrampPool) {
1644         return;
1645       }
1646 
1647       Maybe<Trampoline<MMPolicyT>> maybeRealTramp(
1648           maybeTrampPool.ref().GetNextTrampoline());
1649       if (!maybeRealTramp) {
1650         return;
1651       }
1652 
1653       origBytes.Rewind();
1654       CreateTrampoline(origBytes, maybeTrampPool.ptr(), maybeRealTramp.ref(),
1655                        aDest, aOutTramp);
1656       return;
1657     }
1658 
1659     // Write the branch from the trampoline back to the original code
1660 
1661     tramp.WriteLoadLiteral(origBytes.GetAddress(), 16);
1662     tramp.WriteInstruction(arm64::BuildUnconditionalBranchToRegister(16));
1663 #else
1664 #  error "Unsupported processor architecture"
1665 #endif
1666 
1667     // The trampoline is now complete.
1668     void* trampPtr = tramp.EndExecutableCode();
1669     if (!trampPtr) {
1670       return;
1671     }
1672 
1673     WritableTargetFunction<MMPolicyT> target(origBytes.Promote());
1674     if (!target) {
1675       return;
1676     }
1677 
1678     do {
1679       // Now patch the original function.
1680       // When we're instructed to apply a non-default patch, apply it and exit.
1681       // If non-default patching fails, bail out, no fallback.
1682       // Otherwise, we go straight to the default patch.
1683 
1684 #if defined(_M_X64)
1685       if (use10BytePatch) {
1686         if (!Apply10BytePatch(aTrampPool, trampPtr, target, aDest)) {
1687           return;
1688         }
1689         break;
1690       }
1691 #elif defined(_M_ARM64)
1692       if (numBytesForPatching < kWorstCaseBytesRequired) {
1693         if (!Apply4BytePatch(aTrampPool, trampPtr, target, aDest)) {
1694           return;
1695         }
1696         break;
1697       }
1698 #endif
1699 
1700       PrimitiveT::ApplyDefaultPatch(target, aDest);
1701     } while (false);
1702 
1703     if (!target.Commit()) {
1704       return;
1705     }
1706 
1707     // Output the trampoline, thus signalling that this call was a success
1708     *aOutTramp = trampPtr;
1709   }
1710 };
1711 
1712 }  // namespace interceptor
1713 }  // namespace mozilla
1714 
1715 #endif  // mozilla_interceptor_PatcherDetour_h
1716