1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 
5 /**
6  * This checker implements the "can run script" analysis.  The idea is to detect
7  * functions that can run script that are being passed reference-counted
8  * arguments (including "this") whose refcount might go to zero as a result of
9  * the script running.  We want to prevent that.
10  *
11  * The approach is to attempt to enforce the following invariants on the call
12  * graph:
13  *
14  * 1) Any caller of a MOZ_CAN_RUN_SCRIPT function is itself MOZ_CAN_RUN_SCRIPT.
15  * 2) If a virtual MOZ_CAN_RUN_SCRIPT method overrides a base class method,
16  *    that base class method is also MOZ_CAN_RUN_SCRIPT.
17  *
18  * Invariant 2 ensures that we don't accidentally call a MOZ_CAN_RUN_SCRIPT
19  * function via a base-class virtual call.  Invariant 1 ensures that
20  * the property of being able to run script propagates up the callstack.  There
21  * is an opt-out for invariant 1: A function (declaration _or_ implementation)
22  * can be decorated with MOZ_CAN_RUN_SCRIPT_BOUNDARY to indicate that we do not
23  * require it or any of its callers to be MOZ_CAN_RUN_SCRIPT even if it calls
24  * MOZ_CAN_RUN_SCRIPT functions.
25  *
26  * There are two known holes in invariant 1, apart from the
27  * MOZ_CAN_RUN_SCRIPT_BOUNDARY opt-out:
28  *
29  *  - Functions called via function pointers can be MOZ_CAN_RUN_SCRIPT even if
30  *    their caller is not, because we have no way to determine from the function
31  *    pointer what function is being called.
32  *  - MOZ_CAN_RUN_SCRIPT destructors can happen in functions that are not
33  *    MOZ_CAN_RUN_SCRIPT.
34  *    https://bugzilla.mozilla.org/show_bug.cgi?id=1535523 tracks this.
35  *
36  * Given those invariants we then require that when calling a MOZ_CAN_RUN_SCRIPT
37  * function all refcounted arguments (including "this") satisfy one of these
38  * conditions:
39  *  a) The argument is held via a strong pointer on the stack.
40  *  b) The argument is a const strong pointer member of "this".  We know "this"
41  *     is being kept alive, and a const strong pointer member can't drop its ref
42  *     until "this" dies.
43  *  c) The argument is an argument of the caller (and hence held by a strong
44  *     pointer somewhere higher up the callstack).
45  *  d) The argument is explicitly annotated with MOZ_KnownLive, which indicates
46  *     that something is guaranteed to keep it alive (e.g. it's rooted via a JS
47  *     reflector).
48  *  e) The argument is constexpr and therefore cannot disappear.
49  */
50 
51 #include "CanRunScriptChecker.h"
52 #include "CustomMatchers.h"
53 #include "clang/Lex/Lexer.h"
54 
registerMatchers(MatchFinder * AstMatcher)55 void CanRunScriptChecker::registerMatchers(MatchFinder *AstMatcher) {
56   auto Refcounted = qualType(hasDeclaration(cxxRecordDecl(isRefCounted())));
57   auto StackSmartPtr = ignoreTrivials(declRefExpr(to(varDecl(
58       hasAutomaticStorageDuration(), hasType(isSmartPtrToRefCounted())))));
59   auto ConstMemberOfThisSmartPtr =
60       memberExpr(hasType(isSmartPtrToRefCounted()), hasType(isConstQualified()),
61                  hasObjectExpression(cxxThisExpr()));
62   // A smartptr can be known-live for three reasons:
63   // 1) It's declared on the stack.
64   // 2) It's a const member of "this".  We know "this" is alive (recursively)
65   //    and const members can't change their value hence can't drop their
66   //    reference until "this" gets destroyed.
67   // 3) It's an immediate temporary being constructed at the point where the
68   //    call is happening.
69   auto KnownLiveSmartPtr = anyOf(
70       StackSmartPtr, ConstMemberOfThisSmartPtr,
71       ignoreTrivials(cxxConstructExpr(hasType(isSmartPtrToRefCounted()))));
72 
73   auto MozKnownLiveCall =
74       ignoreTrivials(callExpr(callee(functionDecl(hasName("MOZ_KnownLive")))));
75 
76   // Params of the calling function are presumed live, because it itself should
77   // be MOZ_CAN_RUN_SCRIPT.  Note that this is subject to
78   // https://bugzilla.mozilla.org/show_bug.cgi?id=1537656 at the moment.
79   auto KnownLiveParam = anyOf(
80       // "this" is OK
81       cxxThisExpr(),
82       // A parameter of the calling function is OK.
83       declRefExpr(to(parmVarDecl())));
84 
85   auto KnownLiveMemberOfParam =
86       memberExpr(hasKnownLiveAnnotation(),
87                  hasObjectExpression(anyOf(
88                      ignoreTrivials(KnownLiveParam),
89                      declRefExpr(to(varDecl(hasAutomaticStorageDuration()))))));
90 
91   // A matcher that matches various things that are known to be live directly,
92   // without making any assumptions about operators.
93   auto KnownLiveBase = anyOf(
94       // Things that are known to be a stack or immutable refptr.
95       KnownLiveSmartPtr,
96       // MOZ_KnownLive() calls.
97       MozKnownLiveCall,
98       // Params of the caller function.
99       KnownLiveParam,
100       // Members of the params that are marked as MOZ_KNOWN_LIVE
101       KnownLiveMemberOfParam,
102       // Constexpr things.
103       declRefExpr(to(varDecl(isConstexpr()))));
104 
105   // A matcher that matches various known-live things that don't involve
106   // non-unary operators.
107   auto KnownLiveSimple = anyOf(
108       // Things that are just known live.
109       KnownLiveBase,
110       // Method calls on a live things that are smart ptrs.  Note that we don't
111       // want to allow general method calls on live things, because those can
112       // return non-live objects (e.g. consider "live_pointer->foo()" as an
113       // example).  For purposes of this analysis we are assuming the method
114       // calls on smart ptrs all just return the pointer inside,
115       cxxMemberCallExpr(
116           on(anyOf(allOf(hasType(isSmartPtrToRefCounted()), KnownLiveBase),
117                    // Allow it if calling a member method which is marked as
118                    // MOZ_KNOWN_LIVE
119                    KnownLiveMemberOfParam))),
120       // operator* or operator-> on a thing that is already known to be live.
121       cxxOperatorCallExpr(
122           anyOf(hasOverloadedOperatorName("*"),
123                 hasOverloadedOperatorName("->")),
124           hasAnyArgument(
125               anyOf(KnownLiveBase, ignoreTrivials(KnownLiveMemberOfParam))),
126           argumentCountIs(1)),
127       // A dereference on a thing that is known to be live.  This is _not_
128       // caught by the "operator* or operator->" clause above, because
129       // cxxOperatorCallExpr() only catches cases when a class defines
130       // operator*.  The default (built-in) operator* matches unaryOperator()
131       // instead.),
132       unaryOperator(
133           unaryDereferenceOperator(),
134           hasUnaryOperand(
135               // If we're doing *someArg, the argument of the dereference is an
136               // ImplicitCastExpr LValueToRValue which has the DeclRefExpr as an
137               // argument.  We could try to match that explicitly with a custom
138               // matcher (none of the built-in matchers seem to match on the
139               // thing being cast for an implicitCastExpr), but it's simpler to
140               // just use ignoreTrivials to strip off the cast.
141               ignoreTrivials(KnownLiveBase))),
142       // Taking a pointer to a live reference.  We explicitly want to exclude
143       // things that are not of type reference-to-refcounted or type refcounted,
144       // because if someone takes a pointer to a pointer to refcounted or a
145       // pointer to a smart ptr and passes those in to a callee that definitely
146       // does not guarantee liveness; in fact the callee could modify those
147       // things!  In practice they would be the wrong type anyway, though, so
148       // it's hard to add a test for this.
149       unaryOperator(hasOperatorName("&"),
150                     hasUnaryOperand(allOf(anyOf(hasType(references(Refcounted)),
151                                                 hasType(Refcounted)),
152                                           ignoreTrivials(KnownLiveBase)))));
153 
154   auto KnownLive = anyOf(
155       // Anything above, of course.
156       KnownLiveSimple,
157       // Conditional operators where both arms are live.
158       conditionalOperator(hasFalseExpression(ignoreTrivials(KnownLiveSimple)),
159                           hasTrueExpression(ignoreTrivials(KnownLiveSimple)))
160       // We're not handling cases like a dereference of a conditional operator,
161       // mostly because handling a dereference in general is so ugly.  I
162       // _really_ wish I could just write a recursive matcher here easily.
163   );
164 
165   auto InvalidArg = ignoreTrivialsConditional(
166       // We want to consider things if there is anything refcounted involved,
167       // including in any of the trivials that we otherwise strip off.
168       anyOf(hasType(Refcounted), hasType(pointsTo(Refcounted)),
169             hasType(references(Refcounted)), hasType(isSmartPtrToRefCounted())),
170       // We want to find any expression,
171       expr(
172           // which is not known live,
173           unless(KnownLive),
174           // and which is not a default arg with value nullptr, since those are
175           // always safe,
176           unless(cxxDefaultArgExpr(isNullDefaultArg())),
177           // and which is not a literal nullptr,
178           unless(cxxNullPtrLiteralExpr()), expr().bind("invalidArg")));
179 
180   // A matcher which will mark the first invalid argument it finds invalid, but
181   // will always match, even if it finds no invalid arguments, so it doesn't
182   // preclude other matchers from running and maybe finding invalid args.
183   auto OptionalInvalidExplicitArg = anyOf(
184       // We want to find any argument which is invalid.
185       hasAnyArgument(InvalidArg),
186 
187       // This makes this matcher optional.
188       anything());
189 
190   // Please note that the hasCanRunScriptAnnotation() matchers are not present
191   // directly in the cxxMemberCallExpr, callExpr and constructExpr matchers
192   // because we check that the corresponding functions can run script later in
193   // the checker code.
194   AstMatcher->addMatcher(
195       expr(
196           anyOf(
197               // We want to match a method call expression,
198               cxxMemberCallExpr(
199                   // which optionally has an invalid arg,
200                   OptionalInvalidExplicitArg,
201                   // or which optionally has an invalid this argument,
202                   anyOf(on(InvalidArg), anything()), expr().bind("callExpr")),
203               // or a regular call expression,
204               callExpr(
205                   // which optionally has an invalid arg.
206                   OptionalInvalidExplicitArg, expr().bind("callExpr")),
207               // or a construct expression,
208               cxxConstructExpr(
209                   // which optionally has an invalid arg.
210                   OptionalInvalidExplicitArg, expr().bind("constructExpr"))),
211 
212           anyOf(
213               // We want to match the parent function.
214               forFunction(functionDecl().bind("nonCanRunScriptParentFunction")),
215 
216               // ... optionally.
217               anything())),
218       this);
219 }
220 
onStartOfTranslationUnit()221 void CanRunScriptChecker::onStartOfTranslationUnit() {
222   IsFuncSetBuilt = false;
223   CanRunScriptFuncs.clear();
224 }
225 
226 namespace {
227 /// This class is a callback used internally to match function declarations with
228 /// the MOZ_CAN_RUN_SCRIPT annotation, adding these functions to the
229 /// can-run-script function set and making sure the functions they override (if
230 /// any) also have the annotation.
231 class FuncSetCallback : public MatchFinder::MatchCallback {
232 public:
FuncSetCallback(CanRunScriptChecker & Checker,std::unordered_set<const FunctionDecl * > & FuncSet)233   FuncSetCallback(CanRunScriptChecker &Checker,
234                   std::unordered_set<const FunctionDecl *> &FuncSet)
235       : CanRunScriptFuncs(FuncSet), Checker(Checker) {}
236 
237   void run(const MatchFinder::MatchResult &Result) override;
238 
239 private:
240   /// This method checks the methods overriden by the given parameter.
241   void checkOverriddenMethods(const CXXMethodDecl *Method);
242 
243   std::unordered_set<const FunctionDecl *> &CanRunScriptFuncs;
244   CanRunScriptChecker &Checker;
245 };
246 
run(const MatchFinder::MatchResult & Result)247 void FuncSetCallback::run(const MatchFinder::MatchResult &Result) {
248   const FunctionDecl *Func;
249   if (auto *Lambda = Result.Nodes.getNodeAs<LambdaExpr>("lambda")) {
250     Func = Lambda->getCallOperator();
251     if (!Func || !hasCustomAttribute<moz_can_run_script>(Func))
252       return;
253   } else {
254     Func = Result.Nodes.getNodeAs<FunctionDecl>("canRunScriptFunction");
255 
256     const char *ErrorAttrInDefinition =
257         "MOZ_CAN_RUN_SCRIPT must be put in front "
258         "of the declaration, not the definition";
259     const char *NoteAttrInDefinition = "The first declaration exists here";
260     if (!Func->isFirstDecl() &&
261         !hasCustomAttribute<moz_can_run_script_for_definition>(Func)) {
262       const FunctionDecl *FirstDecl = Func->getFirstDecl();
263       if (!hasCustomAttribute<moz_can_run_script>(FirstDecl)) {
264         Checker.diag(Func->getLocation(), ErrorAttrInDefinition,
265                      DiagnosticIDs::Error);
266         Checker.diag(FirstDecl->getLocation(), NoteAttrInDefinition,
267                      DiagnosticIDs::Note);
268       }
269     }
270   }
271 
272   CanRunScriptFuncs.insert(Func);
273 
274   // If this is a method, we check the methods it overrides.
275   if (auto *Method = dyn_cast<CXXMethodDecl>(Func)) {
276     checkOverriddenMethods(Method);
277   }
278 }
279 
checkOverriddenMethods(const CXXMethodDecl * Method)280 void FuncSetCallback::checkOverriddenMethods(const CXXMethodDecl *Method) {
281   for (auto OverriddenMethod : Method->overridden_methods()) {
282     if (!hasCustomAttribute<moz_can_run_script>(OverriddenMethod)) {
283       const char *ErrorNonCanRunScriptOverridden =
284           "functions marked as MOZ_CAN_RUN_SCRIPT cannot override functions "
285           "that are not marked MOZ_CAN_RUN_SCRIPT";
286       const char *NoteNonCanRunScriptOverridden =
287           "overridden function declared here";
288 
289       Checker.diag(Method->getLocation(), ErrorNonCanRunScriptOverridden,
290                    DiagnosticIDs::Error);
291       Checker.diag(OverriddenMethod->getLocation(),
292                    NoteNonCanRunScriptOverridden, DiagnosticIDs::Note);
293     }
294   }
295 }
296 } // namespace
297 
buildFuncSet(ASTContext * Context)298 void CanRunScriptChecker::buildFuncSet(ASTContext *Context) {
299   // We create a match finder.
300   MatchFinder Finder;
301   // We create the callback which will be called when we find a function with
302   // a MOZ_CAN_RUN_SCRIPT annotation.
303   FuncSetCallback Callback(*this, CanRunScriptFuncs);
304   // We add the matcher to the finder, linking it to our callback.
305   Finder.addMatcher(
306       functionDecl(hasCanRunScriptAnnotation()).bind("canRunScriptFunction"),
307       &Callback);
308   Finder.addMatcher(lambdaExpr().bind("lambda"), &Callback);
309   // We start the analysis, given the ASTContext our main checker is in.
310   Finder.matchAST(*Context);
311 }
312 
check(const MatchFinder::MatchResult & Result)313 void CanRunScriptChecker::check(const MatchFinder::MatchResult &Result) {
314 
315   // If the set of functions which can run script is not yet built, then build
316   // it.
317   if (!IsFuncSetBuilt) {
318     buildFuncSet(Result.Context);
319     IsFuncSetBuilt = true;
320   }
321 
322   const char *ErrorInvalidArg =
323       "arguments must all be strong refs or caller's parameters when calling a "
324       "function marked as MOZ_CAN_RUN_SCRIPT (including the implicit object "
325       "argument).  '%0' is neither.";
326 
327   const char *ErrorNonCanRunScriptParent =
328       "functions marked as MOZ_CAN_RUN_SCRIPT can only be called from "
329       "functions also marked as MOZ_CAN_RUN_SCRIPT";
330   const char *NoteNonCanRunScriptParent = "caller function declared here";
331 
332   const Expr *InvalidArg;
333   if (const CXXDefaultArgExpr *defaultArg =
334           Result.Nodes.getNodeAs<CXXDefaultArgExpr>("invalidArg")) {
335     InvalidArg = defaultArg->getExpr();
336   } else {
337     InvalidArg = Result.Nodes.getNodeAs<Expr>("invalidArg");
338   }
339 
340   const CallExpr *Call = Result.Nodes.getNodeAs<CallExpr>("callExpr");
341   // If we don't find the FunctionDecl linked to this call or if it's not marked
342   // as can-run-script, consider that we didn't find a match.
343   if (Call && (!Call->getDirectCallee() ||
344                !CanRunScriptFuncs.count(Call->getDirectCallee()))) {
345     Call = nullptr;
346   }
347 
348   const CXXConstructExpr *Construct =
349       Result.Nodes.getNodeAs<CXXConstructExpr>("constructExpr");
350 
351   // If we don't find the CXXConstructorDecl linked to this construct expression
352   // or if it's not marked as can-run-script, consider that we didn't find a
353   // match.
354   if (Construct && (!Construct->getConstructor() ||
355                     !CanRunScriptFuncs.count(Construct->getConstructor()))) {
356     Construct = nullptr;
357   }
358 
359   const FunctionDecl *ParentFunction =
360       Result.Nodes.getNodeAs<FunctionDecl>("nonCanRunScriptParentFunction");
361   // If the parent function can run script, consider that we didn't find a match
362   // because we only care about parent functions which can't run script.
363   //
364   // In addition, If the parent function is annotated as a
365   // CAN_RUN_SCRIPT_BOUNDARY, we don't want to complain about it calling a
366   // CAN_RUN_SCRIPT function. This is a mechanism to opt out of the infectious
367   // nature of CAN_RUN_SCRIPT which is necessary in some tricky code like
368   // Bindings.
369   if (ParentFunction &&
370       (CanRunScriptFuncs.count(ParentFunction) ||
371        hasCustomAttribute<moz_can_run_script_boundary>(ParentFunction))) {
372     ParentFunction = nullptr;
373   }
374 
375   // Get the call range from either the CallExpr or the ConstructExpr.
376   SourceRange CallRange;
377   if (Call) {
378     CallRange = Call->getSourceRange();
379   } else if (Construct) {
380     CallRange = Construct->getSourceRange();
381   } else {
382     // If we have neither a Call nor a Construct, we have nothing do to here.
383     return;
384   }
385 
386   // If we have an invalid argument in the call, we emit the diagnostic to
387   // signal it.
388   if (InvalidArg) {
389     const StringRef invalidArgText = Lexer::getSourceText(
390         CharSourceRange::getTokenRange(InvalidArg->getSourceRange()),
391         Result.Context->getSourceManager(), Result.Context->getLangOpts());
392     diag(InvalidArg->getExprLoc(), ErrorInvalidArg, DiagnosticIDs::Error)
393         << InvalidArg->getSourceRange() << invalidArgText;
394   }
395 
396   // If the parent function is not marked as MOZ_CAN_RUN_SCRIPT, we emit an
397   // error and a not indicating it.
398   if (ParentFunction) {
399     assert(!hasCustomAttribute<moz_can_run_script>(ParentFunction) &&
400            "Matcher missed something");
401 
402     diag(CallRange.getBegin(), ErrorNonCanRunScriptParent, DiagnosticIDs::Error)
403         << CallRange;
404 
405     diag(ParentFunction->getCanonicalDecl()->getLocation(),
406          NoteNonCanRunScriptParent, DiagnosticIDs::Note);
407   }
408 }
409