1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "tools/binary_size/libsupersize/caspian/diff.h"
6
7 #include <deque>
8 #include <functional>
9 #include <iostream>
10 #include <list>
11 #include <string>
12 #include <string_view>
13 #include <unordered_map>
14 #include <utility>
15 #include <vector>
16
17 #include "third_party/re2/src/re2/re2.h"
18
19 namespace {
20 struct SymbolMatchIndex {
SymbolMatchIndex__anon2511a1390111::SymbolMatchIndex21 SymbolMatchIndex() {}
SymbolMatchIndex__anon2511a1390111::SymbolMatchIndex22 SymbolMatchIndex(caspian::SectionId id,
23 std::string_view name,
24 std::string_view path,
25 int size_without_padding)
26 : nonempty(true),
27 id(id),
28 name(name),
29 path(path),
30 size_without_padding(size_without_padding) {
31 this->name = name;
32 }
33
operator bool__anon2511a1390111::SymbolMatchIndex34 operator bool() const { return nonempty; }
35
operator ==__anon2511a1390111::SymbolMatchIndex36 bool operator==(const SymbolMatchIndex& other) const {
37 return id == other.id && name == other.name && path == other.path &&
38 size_without_padding == other.size_without_padding;
39 }
40
41 bool nonempty = false;
42 caspian::SectionId id;
43 std::string_view name;
44 std::string_view path;
45 int size_without_padding;
46 };
47 } // namespace
48
49 namespace std {
50 template <>
51 struct hash<SymbolMatchIndex> {
52 static constexpr size_t kPrime1 = 105929;
53 static constexpr size_t kPrime2 = 8543;
operator ()std::hash54 size_t operator()(const SymbolMatchIndex& k) const {
55 return ((kPrime1 * static_cast<size_t>(k.id)) ^
56 hash<string_view>()(k.name) ^ hash<string_view>()(k.path) ^
57 (kPrime2 * k.size_without_padding));
58 }
59 };
60 } // namespace std
61
62 namespace {
63 // Copied from /base/stl_util.h
64 template <class T, class Allocator, class Value>
Erase(std::vector<T,Allocator> & container,const Value & value)65 void Erase(std::vector<T, Allocator>& container, const Value& value) {
66 container.erase(std::remove(container.begin(), container.end(), value),
67 container.end());
68 }
69
GetIdPath(const caspian::Symbol & sym)70 std::string_view GetIdPath(const caspian::Symbol& sym) {
71 return (sym.SourcePath() && *sym.SourcePath()) ? sym.SourcePath()
72 : sym.ObjectPath();
73 }
74
MatchSymbols(std::function<SymbolMatchIndex (const caspian::Symbol &)> key_func,std::vector<caspian::DeltaSymbol> * delta_symbols,std::vector<const caspian::Symbol * > * unmatched_before,std::vector<const caspian::Symbol * > * unmatched_after,std::unordered_map<caspian::SectionId,float> * padding_by_section_name)75 int MatchSymbols(
76 std::function<SymbolMatchIndex(const caspian::Symbol&)> key_func,
77 std::vector<caspian::DeltaSymbol>* delta_symbols,
78 std::vector<const caspian::Symbol*>* unmatched_before,
79 std::vector<const caspian::Symbol*>* unmatched_after,
80 std::unordered_map<caspian::SectionId, float>* padding_by_section_name) {
81 int n_matched_symbols = 0;
82 std::unordered_map<SymbolMatchIndex,
83 std::list<std::reference_wrapper<const caspian::Symbol*>>>
84 before_symbols_by_key;
85 for (const caspian::Symbol*& before_sym : *unmatched_before) {
86 SymbolMatchIndex key = key_func(*before_sym);
87 if (key) {
88 before_symbols_by_key[key].emplace_back(before_sym);
89 }
90 }
91
92 for (const caspian::Symbol*& after_sym : *unmatched_after) {
93 SymbolMatchIndex key = key_func(*after_sym);
94 if (key) {
95 const auto& found = before_symbols_by_key.find(key);
96 if (found != before_symbols_by_key.end() && found->second.size()) {
97 const caspian::Symbol*& before_sym = found->second.front().get();
98 found->second.pop_front();
99 // Padding tracked in aggregate, except for padding-only symbols.
100 if (before_sym->SizeWithoutPadding() != 0) {
101 (*padding_by_section_name)[before_sym->section_id_] +=
102 after_sym->PaddingPss() - before_sym->PaddingPss();
103 }
104 caspian::DeltaSymbol delta_sym(before_sym, after_sym);
105 delta_symbols->push_back(delta_sym);
106 // Null associated pointers in |unmatched_before|, |unmatched_after|.
107 before_sym = nullptr;
108 after_sym = nullptr;
109 n_matched_symbols++;
110 }
111 }
112 }
113
114 // Compact out nulled entries.
115 Erase(*unmatched_before, nullptr);
116 Erase(*unmatched_after, nullptr);
117 return n_matched_symbols;
118 }
119
120 class DiffHelper {
121 public:
122 DiffHelper() = default;
123
StripNumbers(std::string_view in)124 std::string_view StripNumbers(std::string_view in) {
125 static const RE2 number_regex("\\d+");
126 re2::StringPiece piece(in.data(), in.size());
127 if (RE2::PartialMatch(piece, number_regex)) {
128 tmp_strings_.push_back(std::string(in));
129 RE2::GlobalReplace(&tmp_strings_.back(), number_regex, "");
130 return tmp_strings_.back();
131 }
132 return in;
133 }
134
NormalizeStarSymbols(std::string_view in)135 std::string_view NormalizeStarSymbols(std::string_view in) {
136 // Used only for "*" symbols to strip suffixes "abc123" or "abc123 (any)".
137 static const RE2 normalize_star_symbols("\\s+\\d+(?: \\(.*\\))?$");
138 re2::StringPiece piece(in.data(), in.size());
139 if (RE2::PartialMatch(piece, normalize_star_symbols)) {
140 tmp_strings_.push_back(std::string(in));
141 RE2::Replace(&tmp_strings_.back(), normalize_star_symbols, "s");
142 return tmp_strings_.back();
143 }
144 return in;
145 }
146
147 using MatchFunc = std::function<SymbolMatchIndex(const caspian::Symbol&)>;
148
SectionAndFullNameAndPathAndSize()149 MatchFunc SectionAndFullNameAndPathAndSize() {
150 return [](const caspian::Symbol& sym) {
151 return SymbolMatchIndex(sym.section_id_, sym.full_name_, GetIdPath(sym),
152 sym.Pss());
153 };
154 }
155
SectionAndFullNameAndPath()156 MatchFunc SectionAndFullNameAndPath() {
157 return [this](const caspian::Symbol& sym) {
158 return SymbolMatchIndex(sym.section_id_, StripNumbers(sym.full_name_),
159 GetIdPath(sym), 0.0f);
160 };
161 }
162
163 // Allows signature changes (uses |Name()| rather than |FullName()|)
SectionAndNameAndPath()164 MatchFunc SectionAndNameAndPath() {
165 return [this](const caspian::Symbol& sym) {
166 std::string_view name = sym.Name();
167 if (!name.empty() && name[0] == '*') {
168 name = NormalizeStarSymbols(name);
169 }
170 return SymbolMatchIndex(sym.section_id_, name, GetIdPath(sym), 0.0f);
171 };
172 }
173
174 // Match on full name, but without path (to account for file moves)
SectionAndFullName()175 MatchFunc SectionAndFullName() {
176 return [](const caspian::Symbol& sym) {
177 if (!sym.IsNameUnique()) {
178 return SymbolMatchIndex();
179 }
180 return SymbolMatchIndex(sym.section_id_, sym.full_name_, "", 0.0f);
181 };
182 }
183
ClearStrings()184 void ClearStrings() { tmp_strings_.clear(); }
185
186 private:
187 // Holds strings created during number stripping/star symbol normalization.
188 std::deque<std::string> tmp_strings_;
189 };
190 } // namespace
191
192 namespace caspian {
193
Diff(const SizeInfo * before,const SizeInfo * after)194 DeltaSizeInfo Diff(const SizeInfo* before, const SizeInfo* after) {
195 DeltaSizeInfo ret(before, after);
196
197 std::vector<const Symbol*> unmatched_before;
198 for (const Symbol& sym : before->raw_symbols) {
199 unmatched_before.push_back(&sym);
200 }
201
202 std::vector<const Symbol*> unmatched_after;
203 for (const Symbol& sym : after->raw_symbols) {
204 unmatched_after.push_back(&sym);
205 }
206
207 // Attempt several rounds to use increasingly loose matching on unmatched
208 // symbols. Any symbols still unmatched are tried in the next round.
209 int step = 0;
210 DiffHelper helper;
211 std::vector<DiffHelper::MatchFunc>
212 key_funcs = {helper.SectionAndFullNameAndPathAndSize(),
213 helper.SectionAndFullNameAndPath(),
214 helper.SectionAndNameAndPath(), helper.SectionAndFullName()};
215 std::unordered_map<SectionId, float> padding_by_section_name;
216 for (const auto& key_function : key_funcs) {
217 int n_matched_symbols =
218 MatchSymbols(key_function, &ret.delta_symbols, &unmatched_before,
219 &unmatched_after, &padding_by_section_name);
220 std::cout << "Matched " << n_matched_symbols << " symbols in matching pass "
221 << ++step << std::endl;
222 helper.ClearStrings();
223 }
224
225 // Add removals or deletions for any unmatched symbols.
226 for (const Symbol* after_sym : unmatched_after) {
227 ret.delta_symbols.push_back(DeltaSymbol(nullptr, after_sym));
228 }
229 for (const Symbol* before_sym : unmatched_before) {
230 ret.delta_symbols.push_back(DeltaSymbol(before_sym, nullptr));
231 }
232
233 // Create a DeltaSymbol to represent the zeroed out padding of matched
234 // symbols.
235 for (const auto& pair : padding_by_section_name) {
236 SectionId section_id = pair.first;
237 float padding = pair.second;
238 if (padding != 0.0f) {
239 ret.owned_symbols.emplace_back();
240 Symbol& after_sym = ret.owned_symbols.back();
241 after_sym.section_id_ = section_id;
242 after_sym.size_ = padding;
243 after_sym.padding_ = padding;
244 after_sym.full_name_ = "Overhead: aggregate padding of diff'ed symbols";
245 after_sym.template_name_ = after_sym.full_name_;
246 after_sym.name_ = after_sym.full_name_;
247 ret.delta_symbols.push_back(DeltaSymbol(nullptr, &after_sym));
248 }
249 }
250 return ret;
251 }
252 } // namespace caspian
253