1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Much of this logic is duplicated at
6 // tools/binary_size/libsupersize/function_signature.py.
7 
8 #include <stddef.h>
9 
10 #include <algorithm>
11 #include <deque>
12 #include <iostream>
13 #include <string>
14 #include <string_view>
15 #include <tuple>
16 #include <vector>
17 
18 #include "tools/binary_size/libsupersize/caspian/function_signature.h"
19 
20 namespace {
EndsWith(std::string_view str,std::string_view suffix,size_t pos=std::string_view::npos)21 bool EndsWith(std::string_view str,
22               std::string_view suffix,
23               size_t pos = std::string_view::npos) {
24   pos = std::min(pos, str.size());
25   size_t span = suffix.size();
26   return pos >= span && str.substr(pos - span, span) == suffix;
27 }
28 
Slice(std::string_view sv,size_t lo,size_t hi)29 std::string_view Slice(std::string_view sv, size_t lo, size_t hi) {
30   return sv.substr(lo, hi - lo);
31 }
32 }  // namespace
33 
34 namespace caspian {
SplitBy(std::string_view str,char delim)35 std::vector<std::string_view> SplitBy(std::string_view str, char delim) {
36   std::vector<std::string_view> ret;
37   while (true) {
38     size_t pos = str.find(delim);
39     ret.push_back(str.substr(0, pos));
40     if (pos == std::string_view::npos) {
41       break;
42     }
43     str = str.substr(pos + 1);
44   }
45   return ret;
46 }
47 
ParseJava(std::string_view full_name,std::deque<std::string> * owned_strings)48 std::tuple<std::string_view, std::string_view, std::string_view> ParseJava(
49     std::string_view full_name,
50     std::deque<std::string>* owned_strings) {
51   // |owned_strings| is used as an allocator, the relative order of its
52   // elements can be arbitrary.
53   std::string maybe_member_type;
54   size_t hash_idx = full_name.find('#');
55   std::string_view full_new_class_name;
56   std::string_view member;
57   std::string_view member_type;
58   if (hash_idx != std::string_view::npos) {
59     // Parse an already parsed full_name.
60     // Format: Class#symbol: type
61     full_new_class_name = full_name.substr(0, hash_idx);
62     size_t colon_idx = full_name.find(':');
63     member = Slice(full_name, hash_idx + 1, colon_idx);
64     if (colon_idx != std::string_view::npos) {
65       member_type = full_name.substr(colon_idx);
66     }
67   } else {
68     // Format: Class [returntype] functionName()
69     std::vector<std::string_view> parts = SplitBy(full_name, ' ');
70     full_new_class_name = parts[0];
71     if (parts.size() >= 2) {
72       member = parts.back();
73     }
74     if (parts.size() >= 3) {
75       maybe_member_type = ": " + std::string(parts[1]);
76       member_type = maybe_member_type;
77     }
78   }
79 
80   if (member.empty()) {
81     std::vector<std::string_view> split = SplitBy(full_new_class_name, '.');
82     std::string_view short_class_name = split.back();
83     return std::make_tuple(full_name, full_name, short_class_name);
84   }
85   owned_strings->push_back(std::string(full_new_class_name) + std::string("#") +
86                            std::string(member) + std::string(member_type));
87   full_name = owned_strings->back();
88 
89   member = member.substr(0, member.find('('));
90 
91   // Class merging.
92   std::string_view full_old_class_name = full_new_class_name;
93   size_t dot_idx = member.rfind('.');
94   if (dot_idx != std::string_view::npos) {
95     full_old_class_name = Slice(member, 0, dot_idx);
96     member = member.substr(dot_idx + 1);
97   }
98 
99   std::string_view short_class_name = full_old_class_name;
100   dot_idx = full_old_class_name.rfind('.');
101   if (dot_idx != std::string_view::npos)
102     short_class_name = short_class_name.substr(dot_idx + 1);
103 
104   owned_strings->push_back(std::string(short_class_name) + std::string("#") +
105                            std::string(member));
106   std::string_view name = owned_strings->back();
107 
108   owned_strings->push_back(std::string(full_old_class_name) + std::string("#") +
109                            std::string(member));
110   std::string_view template_name = owned_strings->back();
111 
112   return std::make_tuple(full_name, template_name, name);
113 }
114 
FindLastCharOutsideOfBrackets(std::string_view name,char target_char,size_t prev_idx)115 size_t FindLastCharOutsideOfBrackets(std::string_view name,
116                                      char target_char,
117                                      size_t prev_idx) {
118   int paren_balance_count = 0;
119   int angle_balance_count = 0;
120   std::string_view prefix = name.substr(0, prev_idx);
121   while (true) {
122     size_t idx = prefix.rfind(target_char);
123     if (idx == std::string_view::npos) {
124       return std::string_view::npos;
125     }
126     for (char c : prefix.substr(idx)) {
127       switch (c) {
128         case '<':
129           angle_balance_count++;
130           break;
131         case '>':
132           angle_balance_count--;
133           break;
134         case '(':
135           paren_balance_count++;
136           break;
137         case ')':
138           paren_balance_count--;
139           break;
140       }
141     }
142     if (angle_balance_count == 0 && paren_balance_count == 0) {
143       return idx;
144     }
145     prefix = prefix.substr(0, idx);
146   }
147 }
148 
FindReturnValueSpace(std::string_view name,size_t paren_idx)149 size_t FindReturnValueSpace(std::string_view name, size_t paren_idx) {
150   size_t space_idx = paren_idx;
151   // Special case: const cast operators (see tests).
152   if (EndsWith(name, " const", paren_idx)) {
153     space_idx = paren_idx - 6;
154   }
155   while (true) {
156     space_idx = FindLastCharOutsideOfBrackets(name, ' ', space_idx);
157     // Special cases: "operator new", "operator< <templ>", "operator<< <tmpl>".
158     // No space is added for operator>><tmpl>.
159     // Currently does not handle operator->, operator->*
160     if (std::string_view::npos == space_idx) {
161       break;
162     }
163     if (EndsWith(name, "operator", space_idx)) {
164       space_idx -= 8;
165     } else if (EndsWith(name, "operator<", space_idx)) {
166       space_idx -= 9;
167     } else if (EndsWith(name, "operator<<", space_idx)) {
168       space_idx -= 10;
169     } else {
170       break;
171     }
172   }
173   return space_idx;
174 }
175 
StripTemplateArgs(std::string_view name_view)176 std::string StripTemplateArgs(std::string_view name_view) {
177   // TODO(jaspercb): Could pass in |owned_strings| to avoid this allocation.
178   std::string name(name_view);
179   size_t last_right_idx = std::string::npos;
180   while (true) {
181     last_right_idx = name.substr(0, last_right_idx).rfind('>');
182     if (last_right_idx == std::string_view::npos) {
183       return name;
184     }
185     size_t left_idx =
186         FindLastCharOutsideOfBrackets(name, '<', last_right_idx + 1);
187     if (left_idx != std::string_view::npos) {
188       // Leave in empty <>s to denote that it's a template.
189       name = std::string(name.substr(0, left_idx + 1)) +
190              std::string(name.substr(last_right_idx));
191       last_right_idx = left_idx;
192     }
193   }
194 }
195 
NormalizeTopLevelGccLambda(std::string_view name,size_t left_paren_idx)196 std::string NormalizeTopLevelGccLambda(std::string_view name,
197                                        size_t left_paren_idx) {
198   // cc::{lambda(PaintOp*)#63}::_FUN(cc:PaintOp*)
199   // -> cc::$lambda#63(cc:PaintOp*)
200 
201   size_t left_brace_idx = name.find('{');
202   if (left_brace_idx == std::string_view::npos) {
203     exit(1);
204   }
205   size_t hash_idx = name.find('#', left_brace_idx + 1);
206   if (hash_idx == std::string_view::npos) {
207     exit(1);
208   }
209   size_t right_brace_idx = name.find('}', hash_idx + 1);
210   if (right_brace_idx == std::string_view::npos) {
211     exit(1);
212   }
213   std::string_view number = Slice(name, hash_idx + 1, right_brace_idx);
214 
215   std::string ret;
216   ret += name.substr(0, left_brace_idx);
217   ret += "$lambda#";
218   ret += number;
219   ret += name.substr(left_paren_idx);
220   return ret;
221 }
222 
NormalizeTopLevelClangLambda(std::string_view name,size_t left_paren_idx)223 std::string NormalizeTopLevelClangLambda(std::string_view name,
224                                          size_t left_paren_idx) {
225   // cc::$_21::__invoke(int) -> cc::$lambda#21(int)
226   size_t dollar_idx = name.find('$');
227   if (dollar_idx == std::string_view::npos) {
228     exit(1);
229   }
230   size_t colon_idx = name.find(':', dollar_idx + 1);
231   if (colon_idx == std::string_view::npos) {
232     exit(1);
233   }
234   std::string_view number = Slice(name, dollar_idx + 2, colon_idx);
235 
236   std::string ret;
237   ret += name.substr(0, dollar_idx);
238   ret += "$lambda#";
239   ret += number;
240   ret += name.substr(left_paren_idx);
241   return ret;
242 }
243 
FindParameterListParen(std::string_view name)244 size_t FindParameterListParen(std::string_view name) {
245   size_t start_idx = 0;
246   int angle_balance_count = 0;
247   int paren_balance_count = 0;
248   while (true) {
249     size_t idx = name.find('(', start_idx);
250     if (idx == std::string_view::npos) {
251       return std::string_view::npos;
252     }
253     for (char c : Slice(name, start_idx, idx)) {
254       switch (c) {
255         case '<':
256           angle_balance_count++;
257           break;
258         case '>':
259           angle_balance_count--;
260           break;
261         case '(':
262           paren_balance_count++;
263           break;
264         case ')':
265           paren_balance_count--;
266           break;
267       }
268     }
269     size_t operator_offset = Slice(name, start_idx, idx).find("operator<");
270     if (operator_offset != std::string_view::npos) {
271       if (name[start_idx + operator_offset + 9] == '<') {
272         // Handle operator<<, <<=
273         angle_balance_count -= 2;
274       } else {
275         // Handle operator<=
276         angle_balance_count -= 1;
277       }
278     } else {
279       operator_offset = Slice(name, start_idx, idx).find("operator>");
280       if (operator_offset != std::string_view::npos) {
281         if (name[start_idx + operator_offset + 9] == '>') {
282           // Handle operator>>,>>=
283           angle_balance_count += 2;
284         } else {
285           // Handle operator>=
286           angle_balance_count += 1;
287         }
288       }
289     }
290 
291     // Adjust paren
292     if (angle_balance_count == 0 && paren_balance_count == 0) {
293       // Special case: skip "(anonymous namespace)".
294       if (name.substr(idx, 21) == "(anonymous namespace)") {
295         start_idx = idx + 21;
296         continue;
297       }
298       // Special case: skip "decltype (...)"
299       // Special case: skip "{lambda(PaintOp*)#63}"
300       if (idx && name[idx - 1] != ' ' && !EndsWith(name, "{lambda", idx)) {
301         return idx;
302       }
303     }
304 
305     start_idx = idx + 1;
306     paren_balance_count++;
307   }
308 }
309 
ParseCpp(std::string_view full_name,std::deque<std::string> * owned_strings)310 std::tuple<std::string_view, std::string_view, std::string_view> ParseCpp(
311     std::string_view full_name,
312     std::deque<std::string>* owned_strings) {
313   std::string name;
314   std::string_view name_view;
315   size_t left_paren_idx = FindParameterListParen(full_name);
316   if (left_paren_idx != std::string::npos && left_paren_idx > 0) {
317     size_t right_paren_idx = full_name.rfind(')');
318     if (right_paren_idx <= left_paren_idx) {
319       std::cerr << "ParseCpp() received bad symbol: " << full_name << std::endl;
320       exit(1);
321     }
322     size_t space_idx = FindReturnValueSpace(full_name, left_paren_idx);
323     std::string name_no_params =
324         std::string(Slice(full_name, space_idx + 1, left_paren_idx));
325     // Special case for top-level lambdas.
326     if (EndsWith(name_no_params, "}::_FUN")) {
327       // Don't use |name_no_params| in here since prior _idx will be off if
328       // there was a return value.
329       owned_strings->push_back(
330           NormalizeTopLevelGccLambda(full_name, left_paren_idx));
331       return ParseCpp(owned_strings->back(), owned_strings);
332     } else if (EndsWith(name_no_params, "::__invoke") &&
333                name_no_params.find('$') != std::string::npos) {
334       owned_strings->push_back(
335           NormalizeTopLevelClangLambda(full_name, left_paren_idx));
336       return ParseCpp(owned_strings->back(), owned_strings);
337     }
338 
339     name = name_no_params + std::string(full_name.substr(right_paren_idx + 1));
340     name_view = name;
341     full_name = full_name.substr(space_idx + 1);
342   } else {
343     name_view = full_name;
344   }
345 
346   owned_strings->emplace_back(name_view);
347   std::string_view template_name = owned_strings->back();
348 
349   owned_strings->push_back(StripTemplateArgs(name_view));
350   std::string_view returned_name = owned_strings->back();
351 
352   return std::make_tuple(full_name, template_name, returned_name);
353 }
354 }  // namespace caspian
355