1*fe6060f1SDimitry Andric //===-- dfsan_origin.h ----------------------------------*- C++ -*-===//
2*fe6060f1SDimitry Andric //
3*fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*fe6060f1SDimitry Andric //
7*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
8*fe6060f1SDimitry Andric //
9*fe6060f1SDimitry Andric // This file is a part of DataFlowSanitizer.
10*fe6060f1SDimitry Andric //
11*fe6060f1SDimitry Andric // Origin id utils.
12*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
13*fe6060f1SDimitry Andric 
14*fe6060f1SDimitry Andric #ifndef DFSAN_ORIGIN_H
15*fe6060f1SDimitry Andric #define DFSAN_ORIGIN_H
16*fe6060f1SDimitry Andric 
17*fe6060f1SDimitry Andric #include "dfsan_chained_origin_depot.h"
18*fe6060f1SDimitry Andric #include "dfsan_flags.h"
19*fe6060f1SDimitry Andric #include "sanitizer_common/sanitizer_stackdepot.h"
20*fe6060f1SDimitry Andric 
21*fe6060f1SDimitry Andric namespace __dfsan {
22*fe6060f1SDimitry Andric 
23*fe6060f1SDimitry Andric // Origin handling.
24*fe6060f1SDimitry Andric //
25*fe6060f1SDimitry Andric // Origin is a 32-bit identifier that is attached to any taint value in the
26*fe6060f1SDimitry Andric // program and describes how this memory came to be tainted.
27*fe6060f1SDimitry Andric //
28*fe6060f1SDimitry Andric // Chained origin id is like:
29*fe6060f1SDimitry Andric // zzzz xxxx xxxx xxxx
30*fe6060f1SDimitry Andric //
31*fe6060f1SDimitry Andric // Chained origin id describes an event of storing a taint value to
32*fe6060f1SDimitry Andric // memory. The xxx part is a value of ChainedOriginDepot, which is a mapping of
33*fe6060f1SDimitry Andric // (stack_id, prev_id) -> id, where
34*fe6060f1SDimitry Andric //  * stack_id describes the event.
35*fe6060f1SDimitry Andric //    StackDepot keeps a mapping between those and corresponding stack traces.
36*fe6060f1SDimitry Andric //  * prev_id is another origin id that describes the earlier part of the
37*fe6060f1SDimitry Andric //    taint value history. 0 prev_id indicates the start of a chain.
38*fe6060f1SDimitry Andric // Following a chain of prev_id provides the full recorded history of a taint
39*fe6060f1SDimitry Andric // value.
40*fe6060f1SDimitry Andric //
41*fe6060f1SDimitry Andric // This, effectively, defines a forest where nodes are points in value history
42*fe6060f1SDimitry Andric // marked with origin ids, and edges are events that are marked with stack_id.
43*fe6060f1SDimitry Andric //
44*fe6060f1SDimitry Andric // The "zzzz" bits of chained origin id are used to store the length of the
45*fe6060f1SDimitry Andric // origin chain.
46*fe6060f1SDimitry Andric 
47*fe6060f1SDimitry Andric class Origin {
48*fe6060f1SDimitry Andric  public:
isValidId(u32 id)49*fe6060f1SDimitry Andric   static bool isValidId(u32 id) { return id != 0; }
50*fe6060f1SDimitry Andric 
raw_id()51*fe6060f1SDimitry Andric   u32 raw_id() const { return raw_id_; }
52*fe6060f1SDimitry Andric 
isChainedOrigin()53*fe6060f1SDimitry Andric   bool isChainedOrigin() const { return Origin::isValidId(raw_id_); }
54*fe6060f1SDimitry Andric 
getChainedId()55*fe6060f1SDimitry Andric   u32 getChainedId() const {
56*fe6060f1SDimitry Andric     CHECK(Origin::isValidId(raw_id_));
57*fe6060f1SDimitry Andric     return raw_id_ & kChainedIdMask;
58*fe6060f1SDimitry Andric   }
59*fe6060f1SDimitry Andric 
60*fe6060f1SDimitry Andric   // Returns the next origin in the chain and the current stack trace.
61*fe6060f1SDimitry Andric   //
62*fe6060f1SDimitry Andric   // It scans a partition of StackDepot linearly, and is used only by origin
63*fe6060f1SDimitry Andric   // tracking report.
getNextChainedOrigin(StackTrace * stack)64*fe6060f1SDimitry Andric   Origin getNextChainedOrigin(StackTrace *stack) const {
65*fe6060f1SDimitry Andric     CHECK(Origin::isValidId(raw_id_));
66*fe6060f1SDimitry Andric     u32 prev_id;
67*fe6060f1SDimitry Andric     u32 stack_id = GetChainedOriginDepot()->Get(getChainedId(), &prev_id);
68*fe6060f1SDimitry Andric     if (stack)
69*fe6060f1SDimitry Andric       *stack = StackDepotGet(stack_id);
70*fe6060f1SDimitry Andric     return Origin(prev_id);
71*fe6060f1SDimitry Andric   }
72*fe6060f1SDimitry Andric 
CreateChainedOrigin(Origin prev,StackTrace * stack)73*fe6060f1SDimitry Andric   static Origin CreateChainedOrigin(Origin prev, StackTrace *stack) {
74*fe6060f1SDimitry Andric     int depth = prev.isChainedOrigin() ? prev.depth() : -1;
75*fe6060f1SDimitry Andric     // depth is the length of the chain minus 1.
76*fe6060f1SDimitry Andric     // origin_history_size of 0 means unlimited depth.
77*fe6060f1SDimitry Andric     if (flags().origin_history_size > 0) {
78*fe6060f1SDimitry Andric       ++depth;
79*fe6060f1SDimitry Andric       if (depth >= flags().origin_history_size || depth > kMaxDepth)
80*fe6060f1SDimitry Andric         return prev;
81*fe6060f1SDimitry Andric     }
82*fe6060f1SDimitry Andric 
83*fe6060f1SDimitry Andric     StackDepotHandle h = StackDepotPut_WithHandle(*stack);
84*fe6060f1SDimitry Andric     if (!h.valid())
85*fe6060f1SDimitry Andric       return prev;
86*fe6060f1SDimitry Andric 
87*fe6060f1SDimitry Andric     if (flags().origin_history_per_stack_limit > 0) {
88*fe6060f1SDimitry Andric       int use_count = h.use_count();
89*fe6060f1SDimitry Andric       if (use_count > flags().origin_history_per_stack_limit)
90*fe6060f1SDimitry Andric         return prev;
91*fe6060f1SDimitry Andric     }
92*fe6060f1SDimitry Andric 
93*fe6060f1SDimitry Andric     u32 chained_id;
94*fe6060f1SDimitry Andric     bool inserted =
95*fe6060f1SDimitry Andric         GetChainedOriginDepot()->Put(h.id(), prev.raw_id(), &chained_id);
96*fe6060f1SDimitry Andric     CHECK((chained_id & kChainedIdMask) == chained_id);
97*fe6060f1SDimitry Andric 
98*fe6060f1SDimitry Andric     if (inserted && flags().origin_history_per_stack_limit > 0)
99*fe6060f1SDimitry Andric       h.inc_use_count_unsafe();
100*fe6060f1SDimitry Andric 
101*fe6060f1SDimitry Andric     return Origin((depth << kDepthShift) | chained_id);
102*fe6060f1SDimitry Andric   }
103*fe6060f1SDimitry Andric 
FromRawId(u32 id)104*fe6060f1SDimitry Andric   static Origin FromRawId(u32 id) { return Origin(id); }
105*fe6060f1SDimitry Andric 
106*fe6060f1SDimitry Andric  private:
107*fe6060f1SDimitry Andric   static const int kDepthBits = 4;
108*fe6060f1SDimitry Andric   static const int kDepthShift = 32 - kDepthBits;
109*fe6060f1SDimitry Andric 
110*fe6060f1SDimitry Andric   static const u32 kChainedIdMask = ((u32)-1) >> kDepthBits;
111*fe6060f1SDimitry Andric 
112*fe6060f1SDimitry Andric   u32 raw_id_;
113*fe6060f1SDimitry Andric 
Origin(u32 raw_id)114*fe6060f1SDimitry Andric   explicit Origin(u32 raw_id) : raw_id_(raw_id) {}
115*fe6060f1SDimitry Andric 
depth()116*fe6060f1SDimitry Andric   int depth() const {
117*fe6060f1SDimitry Andric     CHECK(isChainedOrigin());
118*fe6060f1SDimitry Andric     return (raw_id_ >> kDepthShift) & ((1 << kDepthBits) - 1);
119*fe6060f1SDimitry Andric   }
120*fe6060f1SDimitry Andric 
121*fe6060f1SDimitry Andric  public:
122*fe6060f1SDimitry Andric   static const int kMaxDepth = (1 << kDepthBits) - 1;
123*fe6060f1SDimitry Andric };
124*fe6060f1SDimitry Andric 
125*fe6060f1SDimitry Andric }  // namespace __dfsan
126*fe6060f1SDimitry Andric 
127*fe6060f1SDimitry Andric #endif  // DFSAN_ORIGIN_H
128