1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2  * vim: set ts=8 sts=4 et sw=4 tw=99:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #ifndef js_ProfilingStack_h
8 #define js_ProfilingStack_h
9 
10 #include <algorithm>
11 #include <stdint.h>
12 
13 #include "jstypes.h"
14 
15 #include "js/TypeDecls.h"
16 #include "js/Utility.h"
17 
18 class JSTracer;
19 class PseudoStack;
20 
21 // This file defines the classes PseudoStack and ProfileEntry.
22 // The PseudoStack manages an array of ProfileEntries.
23 // Usage:
24 //
25 //  PseudoStack* pseudoStack = ...;
26 //
27 //  // For CPP stack frames:
28 //  pseudoStack->pushCppFrame(...);
29 //  // Execute some code. When finished, pop the entry:
30 //  pseudoStack->pop();
31 //
32 //  // For JS stack frames:
33 //  pseudoStack->pushJSFrame(...);
34 //  // Execute some code. When finished, pop the entry:
35 //  pseudoStack->pop();
36 //
37 //
38 // Concurrency considerations
39 //
40 // A thread's pseudo stack (and the entries inside it) is only modified by
41 // that thread. However, the pseudo stack can be *read* by a different thread,
42 // the sampler thread: Whenever the profiler wants to sample a given thread A,
43 // the following happens:
44 //  (1) Thread A is suspended.
45 //  (2) The sampler thread (thread S) reads the PseudoStack of thread A,
46 //      including all ProfileEntries that are currently in that stack
47 //      (pseudoStack->entries[0..pseudoStack->stackSize()]).
48 //  (3) Thread A is resumed.
49 //
50 // Thread suspension is achieved using platform-specific APIs; refer to each
51 // platform's Sampler::SuspendAndSampleAndResumeThread implementation in
52 // platform-*.cpp for details.
53 //
54 // When the thread is suspended, the values in pseudoStack->stackPointer and in
55 // the entry range pseudoStack->entries[0..pseudoStack->stackPointer] need to
56 // be in a consistent state, so that thread S does not read partially-
57 // constructed profile entries. More specifically, we have two requirements:
58 //  (1) When adding a new entry at the top of the stack, its ProfileEntry data
59 //      needs to be put in place *before* the stackPointer is incremented, and
60 //      the compiler + CPU need to know that this order matters.
61 //  (2) When popping an entry from the stack and then preparing the
62 //      ProfileEntry data for the next frame that is about to be pushed, the
63 //      decrement of the stackPointer in pop() needs to happen *before* the
64 //      ProfileEntry for the new frame is being popuplated, and the compiler +
65 //      CPU need to know that this order matters.
66 //
67 // We can express the relevance of these orderings in multiple ways.
68 // Option A is to make stackPointer an atomic with SequentiallyConsistent
69 // memory ordering. This would ensure that no writes in thread A would be
70 // reordered across any writes to stackPointer, which satisfies requirements
71 // (1) and (2) at the same time. Option A is the simplest.
72 // Option B is to use ReleaseAcquire memory ordering both for writes to
73 // stackPointer *and* for writes to ProfileEntry fields. Release-stores ensure
74 // that all writes that happened *before this write in program order* are not
75 // reordered to happen after this write. ReleaseAcquire ordering places no
76 // requirements on the ordering of writes that happen *after* this write in
77 // program order.
78 // Using release-stores for writes to stackPointer expresses requirement (1),
79 // and using release-stores for writes to the ProfileEntry fields expresses
80 // requirement (2).
81 //
82 // Option B is more complicated than option A, but has much better performance
83 // on x86/64: In a microbenchmark run on a Macbook Pro from 2017, switching
84 // from option A to option B reduced the overhead of pushing+popping a
85 // ProfileEntry by 10 nanoseconds.
86 // On x86/64, release-stores require no explicit hardware barriers or lock
87 // instructions.
88 // On ARM/64, option B may be slower than option A, because the compiler will
89 // generate hardware barriers for every single release-store instead of just
90 // for the writes to stackPointer. However, the actual performance impact of
91 // this has not yet been measured on ARM, so we're currently using option B
92 // everywhere. This is something that we may want to change in the future once
93 // we've done measurements.
94 
95 namespace js {
96 
97 // A call stack can be specified to the JS engine such that all JS entry/exits
98 // to functions push/pop an entry to/from the specified stack.
99 //
100 // For more detailed information, see vm/GeckoProfiler.h.
101 //
102 class ProfileEntry {
103   // A ProfileEntry represents either a C++ profile entry or a JS one.
104 
105   // WARNING WARNING WARNING
106   //
107   // All the fields below are Atomic<...,ReleaseAcquire>. This is needed so
108   // that writes to these fields are release-writes, which ensures that
109   // earlier writes in this thread don't get reordered after the writes to
110   // these fields. In particular, the decrement of the stack pointer in
111   // PseudoStack::pop() is a write that *must* happen before the values in
112   // this ProfileEntry are changed. Otherwise, the sampler thread might see
113   // an inconsistent state where the stack pointer still points to a
114   // ProfileEntry which has already been popped off the stack and whose
115   // fields have now been partially repopulated with new values.
116   // See the "Concurrency considerations" paragraph at the top of this file
117   // for more details.
118 
119   // Descriptive label for this entry. Must be a static string! Can be an
120   // empty string, but not a null pointer.
121   mozilla::Atomic<const char*, mozilla::ReleaseAcquire> label_;
122 
123   // An additional descriptive string of this entry which is combined with
124   // |label_| in profiler output. Need not be (and usually isn't) static. Can
125   // be null.
126   mozilla::Atomic<const char*, mozilla::ReleaseAcquire> dynamicString_;
127 
128   // Stack pointer for non-JS entries, the script pointer otherwise.
129   mozilla::Atomic<void*, mozilla::ReleaseAcquire> spOrScript;
130 
131   // Line number for non-JS entries, the bytecode offset otherwise.
132   mozilla::Atomic<int32_t, mozilla::ReleaseAcquire> lineOrPcOffset;
133 
134   // Bits 0...1 hold the Kind. Bits 2...3 are unused. Bits 4...12 hold the
135   // Category.
136   mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> kindAndCategory_;
137 
138   static int32_t pcToOffset(JSScript* aScript, jsbytecode* aPc);
139 
140  public:
141   enum class Kind : uint32_t {
142     // A normal C++ frame.
143     CPP_NORMAL = 0,
144 
145     // A special C++ frame indicating the start of a run of JS pseudostack
146     // entries. CPP_MARKER_FOR_JS frames are ignored, except for the sp
147     // field.
148     CPP_MARKER_FOR_JS = 1,
149 
150     // A normal JS frame.
151     JS_NORMAL = 2,
152 
153     // An interpreter JS frame that has OSR-ed into baseline. JS_NORMAL
154     // frames can be converted to JS_OSR and back. JS_OSR frames are
155     // ignored.
156     JS_OSR = 3,
157 
158     KIND_MASK = 0x3,
159   };
160 
161   // Keep these in sync with devtools/client/performance/modules/categories.js
162   enum class Category : uint32_t {
163     OTHER = 1u << 4,
164     CSS = 1u << 5,
165     JS = 1u << 6,
166     GC = 1u << 7,
167     CC = 1u << 8,
168     NETWORK = 1u << 9,
169     GRAPHICS = 1u << 10,
170     STORAGE = 1u << 11,
171     EVENTS = 1u << 12,
172 
173     FIRST = OTHER,
174     LAST = EVENTS,
175 
176     CATEGORY_MASK = ~uint32_t(Kind::KIND_MASK),
177   };
178 
179   static_assert((uint32_t(Category::FIRST) & uint32_t(Kind::KIND_MASK)) == 0,
180                 "Category overlaps with Kind");
181 
isCpp()182   bool isCpp() const {
183     Kind k = kind();
184     return k == Kind::CPP_NORMAL || k == Kind::CPP_MARKER_FOR_JS;
185   }
186 
isJs()187   bool isJs() const {
188     Kind k = kind();
189     return k == Kind::JS_NORMAL || k == Kind::JS_OSR;
190   }
191 
setLabel(const char * aLabel)192   void setLabel(const char* aLabel) { label_ = aLabel; }
label()193   const char* label() const { return label_; }
194 
dynamicString()195   const char* dynamicString() const { return dynamicString_; }
196 
initCppFrame(const char * aLabel,const char * aDynamicString,void * sp,uint32_t aLine,Kind aKind,Category aCategory)197   void initCppFrame(const char* aLabel, const char* aDynamicString, void* sp,
198                     uint32_t aLine, Kind aKind, Category aCategory) {
199     label_ = aLabel;
200     dynamicString_ = aDynamicString;
201     spOrScript = sp;
202     lineOrPcOffset = static_cast<int32_t>(aLine);
203     kindAndCategory_ = uint32_t(aKind) | uint32_t(aCategory);
204     MOZ_ASSERT(isCpp());
205   }
206 
initJsFrame(const char * aLabel,const char * aDynamicString,JSScript * aScript,jsbytecode * aPc)207   void initJsFrame(const char* aLabel, const char* aDynamicString,
208                    JSScript* aScript, jsbytecode* aPc) {
209     label_ = aLabel;
210     dynamicString_ = aDynamicString;
211     spOrScript = aScript;
212     lineOrPcOffset = pcToOffset(aScript, aPc);
213     kindAndCategory_ = uint32_t(Kind::JS_NORMAL) | uint32_t(Category::JS);
214     MOZ_ASSERT(isJs());
215   }
216 
setKind(Kind aKind)217   void setKind(Kind aKind) {
218     kindAndCategory_ = uint32_t(aKind) | uint32_t(category());
219   }
220 
kind()221   Kind kind() const {
222     return Kind(kindAndCategory_ & uint32_t(Kind::KIND_MASK));
223   }
224 
category()225   Category category() const {
226     return Category(kindAndCategory_ & uint32_t(Category::CATEGORY_MASK));
227   }
228 
stackAddress()229   void* stackAddress() const {
230     MOZ_ASSERT(!isJs());
231     return spOrScript;
232   }
233 
234   JS_PUBLIC_API JSScript* script() const;
235 
line()236   uint32_t line() const {
237     MOZ_ASSERT(!isJs());
238     return static_cast<uint32_t>(lineOrPcOffset);
239   }
240 
241   // Note that the pointer returned might be invalid.
rawScript()242   JSScript* rawScript() const {
243     MOZ_ASSERT(isJs());
244     void* script = spOrScript;
245     return static_cast<JSScript*>(script);
246   }
247 
248   // We can't know the layout of JSScript, so look in vm/GeckoProfiler.cpp.
249   JS_FRIEND_API jsbytecode* pc() const;
250   void setPC(jsbytecode* pc);
251 
252   void trace(JSTracer* trc);
253 
254   // The offset of a pc into a script's code can actually be 0, so to
255   // signify a nullptr pc, use a -1 index. This is checked against in
256   // pc() and setPC() to set/get the right pc.
257   static const int32_t NullPCOffset = -1;
258 };
259 
260 JS_FRIEND_API void SetContextProfilingStack(JSContext* cx,
261                                             PseudoStack* pseudoStack);
262 
263 // GetContextProfilingStack also exists, but it's defined in RootingAPI.h.
264 
265 JS_FRIEND_API void EnableContextProfilingStack(JSContext* cx, bool enabled);
266 
267 JS_FRIEND_API void RegisterContextProfilingEventMarker(JSContext* cx,
268                                                        void (*fn)(const char*));
269 
270 }  // namespace js
271 
272 // Each thread has its own PseudoStack. That thread modifies the PseudoStack,
273 // pushing and popping elements as necessary.
274 //
275 // The PseudoStack is also read periodically by the profiler's sampler thread.
276 // This happens only when the thread that owns the PseudoStack is suspended. So
277 // there are no genuine parallel accesses.
278 //
279 // However, it is possible for pushing/popping to be interrupted by a periodic
280 // sample. Because of this, we need pushing/popping to be effectively atomic.
281 //
282 // - When pushing a new entry, we increment the stack pointer -- making the new
283 //   entry visible to the sampler thread -- only after the new entry has been
284 //   fully written. The stack pointer is Atomic<uint32_t,ReleaseAcquire>, so
285 //   the increment is a release-store, which ensures that this store is not
286 //   reordered before the writes of the entry.
287 //
288 // - When popping an old entry, the only operation is the decrementing of the
289 //   stack pointer, which is obviously atomic.
290 //
291 class PseudoStack final {
292  public:
PseudoStack()293   PseudoStack() : stackPointer(0) {}
294 
~PseudoStack()295   ~PseudoStack() {
296     // The label macros keep a reference to the PseudoStack to avoid a TLS
297     // access. If these are somehow not all cleared we will get a
298     // use-after-free so better to crash now.
299     MOZ_RELEASE_ASSERT(stackPointer == 0);
300   }
301 
pushCppFrame(const char * label,const char * dynamicString,void * sp,uint32_t line,js::ProfileEntry::Kind kind,js::ProfileEntry::Category category)302   void pushCppFrame(const char* label, const char* dynamicString, void* sp,
303                     uint32_t line, js::ProfileEntry::Kind kind,
304                     js::ProfileEntry::Category category) {
305     if (stackPointer < MaxEntries) {
306       entries[stackPointer].initCppFrame(label, dynamicString, sp, line, kind,
307                                          category);
308     }
309 
310     // This must happen at the end! The compiler will not reorder this
311     // update because stackPointer is Atomic<..., ReleaseAcquire>, so any
312     // the writes above will not be reordered below the stackPointer store.
313     // Do the read and the write as two separate statements, in order to
314     // make it clear that we don't need an atomic increment, which would be
315     // more expensive on x86 than the separate operations done here.
316     // This thread is the only one that ever changes the value of
317     // stackPointer.
318     uint32_t oldStackPointer = stackPointer;
319     stackPointer = oldStackPointer + 1;
320   }
321 
pushJsFrame(const char * label,const char * dynamicString,JSScript * script,jsbytecode * pc)322   void pushJsFrame(const char* label, const char* dynamicString,
323                    JSScript* script, jsbytecode* pc) {
324     if (stackPointer < MaxEntries) {
325       entries[stackPointer].initJsFrame(label, dynamicString, script, pc);
326     }
327 
328     // This must happen at the end! The compiler will not reorder this
329     // update because stackPointer is Atomic<..., ReleaseAcquire>, which
330     // makes this assignment a release-store, so the writes above will not
331     // be reordered to occur after the stackPointer store.
332     // Do the read and the write as two separate statements, in order to
333     // make it clear that we don't need an atomic increment, which would be
334     // more expensive on x86 than the separate operations done here.
335     // This thread is the only one that ever changes the value of
336     // stackPointer.
337     uint32_t oldStackPointer = stackPointer;
338     stackPointer = oldStackPointer + 1;
339   }
340 
pop()341   void pop() {
342     MOZ_ASSERT(stackPointer > 0);
343     // Do the read and the write as two separate statements, in order to
344     // make it clear that we don't need an atomic decrement, which would be
345     // more expensive on x86 than the separate operations done here.
346     // This thread is the only one that ever changes the value of
347     // stackPointer.
348     uint32_t oldStackPointer = stackPointer;
349     stackPointer = oldStackPointer - 1;
350   }
351 
stackSize()352   uint32_t stackSize() const {
353     return std::min(uint32_t(stackPointer), uint32_t(MaxEntries));
354   }
355 
356  private:
357   // No copying.
358   PseudoStack(const PseudoStack&) = delete;
359   void operator=(const PseudoStack&) = delete;
360 
361  public:
362   static const uint32_t MaxEntries = 1024;
363 
364   // The stack entries.
365   js::ProfileEntry entries[MaxEntries];
366 
367   // This may exceed MaxEntries, so instead use the stackSize() method to
368   // determine the number of valid samples in entries. When this is less
369   // than MaxEntries, it refers to the first free entry past the top of the
370   // in-use stack (i.e. entries[stackPointer - 1] is the top stack entry).
371   //
372   // WARNING WARNING WARNING
373   //
374   // This is an atomic variable that uses ReleaseAcquire memory ordering.
375   // See the "Concurrency considerations" paragraph at the top of this file
376   // for more details.
377   mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> stackPointer;
378 };
379 
380 namespace js {
381 
382 class AutoGeckoProfilerEntry;
383 class GeckoProfilerEntryMarker;
384 class GeckoProfilerBaselineOSRMarker;
385 
386 class GeckoProfilerThread {
387   friend class AutoGeckoProfilerEntry;
388   friend class GeckoProfilerEntryMarker;
389   friend class GeckoProfilerBaselineOSRMarker;
390 
391   PseudoStack* pseudoStack_;
392 
393  public:
394   GeckoProfilerThread();
395 
stackPointer()396   uint32_t stackPointer() {
397     MOZ_ASSERT(installed());
398     return pseudoStack_->stackPointer;
399   }
stack()400   ProfileEntry* stack() { return pseudoStack_->entries; }
getPseudoStack()401   PseudoStack* getPseudoStack() { return pseudoStack_; }
402 
403   /* management of whether instrumentation is on or off */
installed()404   bool installed() { return pseudoStack_ != nullptr; }
405 
406   void setProfilingStack(PseudoStack* pseudoStack);
407   void trace(JSTracer* trc);
408 
409   /*
410    * Functions which are the actual instrumentation to track run information
411    *
412    *   - enter: a function has started to execute
413    *   - updatePC: updates the pc information about where a function
414    *               is currently executing
415    *   - exit: this function has ceased execution, and no further
416    *           entries/exits will be made
417    */
418   bool enter(JSContext* cx, JSScript* script, JSFunction* maybeFun);
419   void exit(JSScript* script, JSFunction* maybeFun);
420   inline void updatePC(JSContext* cx, JSScript* script, jsbytecode* pc);
421 };
422 
423 }  // namespace js
424 
425 #endif /* js_ProfilingStack_h */
426