1*fe6060f1SDimitry Andric //===-- dfsan_origin.h ----------------------------------*- C++ -*-===// 2*fe6060f1SDimitry Andric // 3*fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*fe6060f1SDimitry Andric // 7*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8*fe6060f1SDimitry Andric // 9*fe6060f1SDimitry Andric // This file is a part of DataFlowSanitizer. 10*fe6060f1SDimitry Andric // 11*fe6060f1SDimitry Andric // Origin id utils. 12*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 13*fe6060f1SDimitry Andric 14*fe6060f1SDimitry Andric #ifndef DFSAN_ORIGIN_H 15*fe6060f1SDimitry Andric #define DFSAN_ORIGIN_H 16*fe6060f1SDimitry Andric 17*fe6060f1SDimitry Andric #include "dfsan_chained_origin_depot.h" 18*fe6060f1SDimitry Andric #include "dfsan_flags.h" 19*fe6060f1SDimitry Andric #include "sanitizer_common/sanitizer_stackdepot.h" 20*fe6060f1SDimitry Andric 21*fe6060f1SDimitry Andric namespace __dfsan { 22*fe6060f1SDimitry Andric 23*fe6060f1SDimitry Andric // Origin handling. 24*fe6060f1SDimitry Andric // 25*fe6060f1SDimitry Andric // Origin is a 32-bit identifier that is attached to any taint value in the 26*fe6060f1SDimitry Andric // program and describes how this memory came to be tainted. 27*fe6060f1SDimitry Andric // 28*fe6060f1SDimitry Andric // Chained origin id is like: 29*fe6060f1SDimitry Andric // zzzz xxxx xxxx xxxx 30*fe6060f1SDimitry Andric // 31*fe6060f1SDimitry Andric // Chained origin id describes an event of storing a taint value to 32*fe6060f1SDimitry Andric // memory. The xxx part is a value of ChainedOriginDepot, which is a mapping of 33*fe6060f1SDimitry Andric // (stack_id, prev_id) -> id, where 34*fe6060f1SDimitry Andric // * stack_id describes the event. 35*fe6060f1SDimitry Andric // StackDepot keeps a mapping between those and corresponding stack traces. 36*fe6060f1SDimitry Andric // * prev_id is another origin id that describes the earlier part of the 37*fe6060f1SDimitry Andric // taint value history. 0 prev_id indicates the start of a chain. 38*fe6060f1SDimitry Andric // Following a chain of prev_id provides the full recorded history of a taint 39*fe6060f1SDimitry Andric // value. 40*fe6060f1SDimitry Andric // 41*fe6060f1SDimitry Andric // This, effectively, defines a forest where nodes are points in value history 42*fe6060f1SDimitry Andric // marked with origin ids, and edges are events that are marked with stack_id. 43*fe6060f1SDimitry Andric // 44*fe6060f1SDimitry Andric // The "zzzz" bits of chained origin id are used to store the length of the 45*fe6060f1SDimitry Andric // origin chain. 46*fe6060f1SDimitry Andric 47*fe6060f1SDimitry Andric class Origin { 48*fe6060f1SDimitry Andric public: isValidId(u32 id)49*fe6060f1SDimitry Andric static bool isValidId(u32 id) { return id != 0; } 50*fe6060f1SDimitry Andric raw_id()51*fe6060f1SDimitry Andric u32 raw_id() const { return raw_id_; } 52*fe6060f1SDimitry Andric isChainedOrigin()53*fe6060f1SDimitry Andric bool isChainedOrigin() const { return Origin::isValidId(raw_id_); } 54*fe6060f1SDimitry Andric getChainedId()55*fe6060f1SDimitry Andric u32 getChainedId() const { 56*fe6060f1SDimitry Andric CHECK(Origin::isValidId(raw_id_)); 57*fe6060f1SDimitry Andric return raw_id_ & kChainedIdMask; 58*fe6060f1SDimitry Andric } 59*fe6060f1SDimitry Andric 60*fe6060f1SDimitry Andric // Returns the next origin in the chain and the current stack trace. 61*fe6060f1SDimitry Andric // 62*fe6060f1SDimitry Andric // It scans a partition of StackDepot linearly, and is used only by origin 63*fe6060f1SDimitry Andric // tracking report. getNextChainedOrigin(StackTrace * stack)64*fe6060f1SDimitry Andric Origin getNextChainedOrigin(StackTrace *stack) const { 65*fe6060f1SDimitry Andric CHECK(Origin::isValidId(raw_id_)); 66*fe6060f1SDimitry Andric u32 prev_id; 67*fe6060f1SDimitry Andric u32 stack_id = GetChainedOriginDepot()->Get(getChainedId(), &prev_id); 68*fe6060f1SDimitry Andric if (stack) 69*fe6060f1SDimitry Andric *stack = StackDepotGet(stack_id); 70*fe6060f1SDimitry Andric return Origin(prev_id); 71*fe6060f1SDimitry Andric } 72*fe6060f1SDimitry Andric CreateChainedOrigin(Origin prev,StackTrace * stack)73*fe6060f1SDimitry Andric static Origin CreateChainedOrigin(Origin prev, StackTrace *stack) { 74*fe6060f1SDimitry Andric int depth = prev.isChainedOrigin() ? prev.depth() : -1; 75*fe6060f1SDimitry Andric // depth is the length of the chain minus 1. 76*fe6060f1SDimitry Andric // origin_history_size of 0 means unlimited depth. 77*fe6060f1SDimitry Andric if (flags().origin_history_size > 0) { 78*fe6060f1SDimitry Andric ++depth; 79*fe6060f1SDimitry Andric if (depth >= flags().origin_history_size || depth > kMaxDepth) 80*fe6060f1SDimitry Andric return prev; 81*fe6060f1SDimitry Andric } 82*fe6060f1SDimitry Andric 83*fe6060f1SDimitry Andric StackDepotHandle h = StackDepotPut_WithHandle(*stack); 84*fe6060f1SDimitry Andric if (!h.valid()) 85*fe6060f1SDimitry Andric return prev; 86*fe6060f1SDimitry Andric 87*fe6060f1SDimitry Andric if (flags().origin_history_per_stack_limit > 0) { 88*fe6060f1SDimitry Andric int use_count = h.use_count(); 89*fe6060f1SDimitry Andric if (use_count > flags().origin_history_per_stack_limit) 90*fe6060f1SDimitry Andric return prev; 91*fe6060f1SDimitry Andric } 92*fe6060f1SDimitry Andric 93*fe6060f1SDimitry Andric u32 chained_id; 94*fe6060f1SDimitry Andric bool inserted = 95*fe6060f1SDimitry Andric GetChainedOriginDepot()->Put(h.id(), prev.raw_id(), &chained_id); 96*fe6060f1SDimitry Andric CHECK((chained_id & kChainedIdMask) == chained_id); 97*fe6060f1SDimitry Andric 98*fe6060f1SDimitry Andric if (inserted && flags().origin_history_per_stack_limit > 0) 99*fe6060f1SDimitry Andric h.inc_use_count_unsafe(); 100*fe6060f1SDimitry Andric 101*fe6060f1SDimitry Andric return Origin((depth << kDepthShift) | chained_id); 102*fe6060f1SDimitry Andric } 103*fe6060f1SDimitry Andric FromRawId(u32 id)104*fe6060f1SDimitry Andric static Origin FromRawId(u32 id) { return Origin(id); } 105*fe6060f1SDimitry Andric 106*fe6060f1SDimitry Andric private: 107*fe6060f1SDimitry Andric static const int kDepthBits = 4; 108*fe6060f1SDimitry Andric static const int kDepthShift = 32 - kDepthBits; 109*fe6060f1SDimitry Andric 110*fe6060f1SDimitry Andric static const u32 kChainedIdMask = ((u32)-1) >> kDepthBits; 111*fe6060f1SDimitry Andric 112*fe6060f1SDimitry Andric u32 raw_id_; 113*fe6060f1SDimitry Andric Origin(u32 raw_id)114*fe6060f1SDimitry Andric explicit Origin(u32 raw_id) : raw_id_(raw_id) {} 115*fe6060f1SDimitry Andric depth()116*fe6060f1SDimitry Andric int depth() const { 117*fe6060f1SDimitry Andric CHECK(isChainedOrigin()); 118*fe6060f1SDimitry Andric return (raw_id_ >> kDepthShift) & ((1 << kDepthBits) - 1); 119*fe6060f1SDimitry Andric } 120*fe6060f1SDimitry Andric 121*fe6060f1SDimitry Andric public: 122*fe6060f1SDimitry Andric static const int kMaxDepth = (1 << kDepthBits) - 1; 123*fe6060f1SDimitry Andric }; 124*fe6060f1SDimitry Andric 125*fe6060f1SDimitry Andric } // namespace __dfsan 126*fe6060f1SDimitry Andric 127*fe6060f1SDimitry Andric #endif // DFSAN_ORIGIN_H 128