1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "tools/binary_size/libsupersize/caspian/diff.h"
6 
7 #include <deque>
8 #include <functional>
9 #include <iostream>
10 #include <list>
11 #include <string>
12 #include <string_view>
13 #include <unordered_map>
14 #include <utility>
15 #include <vector>
16 
17 #include "third_party/re2/src/re2/re2.h"
18 
19 namespace {
20 struct SymbolMatchIndex {
SymbolMatchIndex__anon2511a1390111::SymbolMatchIndex21   SymbolMatchIndex() {}
SymbolMatchIndex__anon2511a1390111::SymbolMatchIndex22   SymbolMatchIndex(caspian::SectionId id,
23                    std::string_view name,
24                    std::string_view path,
25                    int size_without_padding)
26       : nonempty(true),
27         id(id),
28         name(name),
29         path(path),
30         size_without_padding(size_without_padding) {
31     this->name = name;
32   }
33 
operator bool__anon2511a1390111::SymbolMatchIndex34   operator bool() const { return nonempty; }
35 
operator ==__anon2511a1390111::SymbolMatchIndex36   bool operator==(const SymbolMatchIndex& other) const {
37     return id == other.id && name == other.name && path == other.path &&
38            size_without_padding == other.size_without_padding;
39   }
40 
41   bool nonempty = false;
42   caspian::SectionId id;
43   std::string_view name;
44   std::string_view path;
45   int size_without_padding;
46 };
47 }  // namespace
48 
49 namespace std {
50 template <>
51 struct hash<SymbolMatchIndex> {
52   static constexpr size_t kPrime1 = 105929;
53   static constexpr size_t kPrime2 = 8543;
operator ()std::hash54   size_t operator()(const SymbolMatchIndex& k) const {
55     return ((kPrime1 * static_cast<size_t>(k.id)) ^
56             hash<string_view>()(k.name) ^ hash<string_view>()(k.path) ^
57             (kPrime2 * k.size_without_padding));
58   }
59 };
60 }  // namespace std
61 
62 namespace {
63 // Copied from /base/stl_util.h
64 template <class T, class Allocator, class Value>
Erase(std::vector<T,Allocator> & container,const Value & value)65 void Erase(std::vector<T, Allocator>& container, const Value& value) {
66   container.erase(std::remove(container.begin(), container.end(), value),
67                   container.end());
68 }
69 
GetIdPath(const caspian::Symbol & sym)70 std::string_view GetIdPath(const caspian::Symbol& sym) {
71   return (sym.SourcePath() && *sym.SourcePath()) ? sym.SourcePath()
72                                                  : sym.ObjectPath();
73 }
74 
MatchSymbols(std::function<SymbolMatchIndex (const caspian::Symbol &)> key_func,std::vector<caspian::DeltaSymbol> * delta_symbols,std::vector<const caspian::Symbol * > * unmatched_before,std::vector<const caspian::Symbol * > * unmatched_after,std::unordered_map<caspian::SectionId,float> * padding_by_section_name)75 int MatchSymbols(
76     std::function<SymbolMatchIndex(const caspian::Symbol&)> key_func,
77     std::vector<caspian::DeltaSymbol>* delta_symbols,
78     std::vector<const caspian::Symbol*>* unmatched_before,
79     std::vector<const caspian::Symbol*>* unmatched_after,
80     std::unordered_map<caspian::SectionId, float>* padding_by_section_name) {
81   int n_matched_symbols = 0;
82   std::unordered_map<SymbolMatchIndex,
83                      std::list<std::reference_wrapper<const caspian::Symbol*>>>
84       before_symbols_by_key;
85   for (const caspian::Symbol*& before_sym : *unmatched_before) {
86     SymbolMatchIndex key = key_func(*before_sym);
87     if (key) {
88       before_symbols_by_key[key].emplace_back(before_sym);
89     }
90   }
91 
92   for (const caspian::Symbol*& after_sym : *unmatched_after) {
93     SymbolMatchIndex key = key_func(*after_sym);
94     if (key) {
95       const auto& found = before_symbols_by_key.find(key);
96       if (found != before_symbols_by_key.end() && found->second.size()) {
97         const caspian::Symbol*& before_sym = found->second.front().get();
98         found->second.pop_front();
99         // Padding tracked in aggregate, except for padding-only symbols.
100         if (before_sym->SizeWithoutPadding() != 0) {
101           (*padding_by_section_name)[before_sym->section_id_] +=
102               after_sym->PaddingPss() - before_sym->PaddingPss();
103         }
104         caspian::DeltaSymbol delta_sym(before_sym, after_sym);
105         delta_symbols->push_back(delta_sym);
106         // Null associated pointers in |unmatched_before|, |unmatched_after|.
107         before_sym = nullptr;
108         after_sym = nullptr;
109         n_matched_symbols++;
110       }
111     }
112   }
113 
114   // Compact out nulled entries.
115   Erase(*unmatched_before, nullptr);
116   Erase(*unmatched_after, nullptr);
117   return n_matched_symbols;
118 }
119 
120 class DiffHelper {
121  public:
122   DiffHelper() = default;
123 
StripNumbers(std::string_view in)124   std::string_view StripNumbers(std::string_view in) {
125     static const RE2 number_regex("\\d+");
126     re2::StringPiece piece(in.data(), in.size());
127     if (RE2::PartialMatch(piece, number_regex)) {
128       tmp_strings_.push_back(std::string(in));
129       RE2::GlobalReplace(&tmp_strings_.back(), number_regex, "");
130       return tmp_strings_.back();
131     }
132     return in;
133   }
134 
NormalizeStarSymbols(std::string_view in)135   std::string_view NormalizeStarSymbols(std::string_view in) {
136     // Used only for "*" symbols to strip suffixes "abc123" or "abc123 (any)".
137     static const RE2 normalize_star_symbols("\\s+\\d+(?: \\(.*\\))?$");
138     re2::StringPiece piece(in.data(), in.size());
139     if (RE2::PartialMatch(piece, normalize_star_symbols)) {
140       tmp_strings_.push_back(std::string(in));
141       RE2::Replace(&tmp_strings_.back(), normalize_star_symbols, "s");
142       return tmp_strings_.back();
143     }
144     return in;
145   }
146 
147   using MatchFunc = std::function<SymbolMatchIndex(const caspian::Symbol&)>;
148 
SectionAndFullNameAndPathAndSize()149   MatchFunc SectionAndFullNameAndPathAndSize() {
150     return [](const caspian::Symbol& sym) {
151       return SymbolMatchIndex(sym.section_id_, sym.full_name_, GetIdPath(sym),
152                               sym.Pss());
153     };
154   }
155 
SectionAndFullNameAndPath()156   MatchFunc SectionAndFullNameAndPath() {
157     return [this](const caspian::Symbol& sym) {
158       return SymbolMatchIndex(sym.section_id_, StripNumbers(sym.full_name_),
159                               GetIdPath(sym), 0.0f);
160     };
161   }
162 
163   // Allows signature changes (uses |Name()| rather than |FullName()|)
SectionAndNameAndPath()164   MatchFunc SectionAndNameAndPath() {
165     return [this](const caspian::Symbol& sym) {
166       std::string_view name = sym.Name();
167       if (!name.empty() && name[0] == '*') {
168         name = NormalizeStarSymbols(name);
169       }
170       return SymbolMatchIndex(sym.section_id_, name, GetIdPath(sym), 0.0f);
171     };
172   }
173 
174   // Match on full name, but without path (to account for file moves)
SectionAndFullName()175   MatchFunc SectionAndFullName() {
176     return [](const caspian::Symbol& sym) {
177       if (!sym.IsNameUnique()) {
178         return SymbolMatchIndex();
179       }
180       return SymbolMatchIndex(sym.section_id_, sym.full_name_, "", 0.0f);
181     };
182   }
183 
ClearStrings()184   void ClearStrings() { tmp_strings_.clear(); }
185 
186  private:
187   // Holds strings created during number stripping/star symbol normalization.
188   std::deque<std::string> tmp_strings_;
189 };
190 }  // namespace
191 
192 namespace caspian {
193 
Diff(const SizeInfo * before,const SizeInfo * after)194 DeltaSizeInfo Diff(const SizeInfo* before, const SizeInfo* after) {
195   DeltaSizeInfo ret(before, after);
196 
197   std::vector<const Symbol*> unmatched_before;
198   for (const Symbol& sym : before->raw_symbols) {
199     unmatched_before.push_back(&sym);
200   }
201 
202   std::vector<const Symbol*> unmatched_after;
203   for (const Symbol& sym : after->raw_symbols) {
204     unmatched_after.push_back(&sym);
205   }
206 
207   // Attempt several rounds to use increasingly loose matching on unmatched
208   // symbols.  Any symbols still unmatched are tried in the next round.
209   int step = 0;
210   DiffHelper helper;
211   std::vector<DiffHelper::MatchFunc>
212       key_funcs = {helper.SectionAndFullNameAndPathAndSize(),
213                    helper.SectionAndFullNameAndPath(),
214                    helper.SectionAndNameAndPath(), helper.SectionAndFullName()};
215   std::unordered_map<SectionId, float> padding_by_section_name;
216   for (const auto& key_function : key_funcs) {
217     int n_matched_symbols =
218         MatchSymbols(key_function, &ret.delta_symbols, &unmatched_before,
219                      &unmatched_after, &padding_by_section_name);
220     std::cout << "Matched " << n_matched_symbols << " symbols in matching pass "
221               << ++step << std::endl;
222     helper.ClearStrings();
223   }
224 
225   // Add removals or deletions for any unmatched symbols.
226   for (const Symbol* after_sym : unmatched_after) {
227     ret.delta_symbols.push_back(DeltaSymbol(nullptr, after_sym));
228   }
229   for (const Symbol* before_sym : unmatched_before) {
230     ret.delta_symbols.push_back(DeltaSymbol(before_sym, nullptr));
231   }
232 
233   // Create a DeltaSymbol to represent the zeroed out padding of matched
234   // symbols.
235   for (const auto& pair : padding_by_section_name) {
236     SectionId section_id = pair.first;
237     float padding = pair.second;
238     if (padding != 0.0f) {
239       ret.owned_symbols.emplace_back();
240       Symbol& after_sym = ret.owned_symbols.back();
241       after_sym.section_id_ = section_id;
242       after_sym.size_ = padding;
243       after_sym.padding_ = padding;
244       after_sym.full_name_ = "Overhead: aggregate padding of diff'ed symbols";
245       after_sym.template_name_ = after_sym.full_name_;
246       after_sym.name_ = after_sym.full_name_;
247       ret.delta_symbols.push_back(DeltaSymbol(nullptr, &after_sym));
248     }
249   }
250   return ret;
251 }
252 }  // namespace caspian
253