1 /*
2 * (C) Copyright 2001-2015 Diomidis Spinellis
3 *
4 * This file is part of CScout.
5 *
6 * CScout is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * CScout is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with CScout. If not, see <http://www.gnu.org/licenses/>.
18 *
19 *
20 * For documentation read the corresponding .h file
21 *
22 */
23
24 #include <iostream>
25 #include <map>
26 #include <string>
27 #include <deque>
28 #include <set>
29 #include <vector>
30 #include <fstream>
31 #include <list>
32 #include <sstream> // ostringstream
33 #include <limits>
34 #include <cstdlib> // abs
35
36 #include "cpp.h"
37 #include "error.h"
38 #include "attr.h"
39 #include "metrics.h"
40 #include "fileid.h"
41 #include "tokid.h"
42 #include "eclass.h"
43 #include "token.h"
44 #include "parse.tab.h"
45 #include "debug.h"
46 #include "fdep.h"
47 #include "idquery.h"
48 #include "fchar.h"
49
50 bool Token::check_clashes;
51 bool Token::found_clashes;
52
53 // Display a token part
54 ostream&
operator <<(ostream & o,const Tpart & t)55 operator<<(ostream& o,const Tpart &t)
56 {
57 cout << t.ti << ",l=" << t.len;
58 ifstream in(t.ti.get_path().c_str());
59 if (in.fail())
60 return o;
61 in.seekg(t.ti.get_streampos());
62 o << '[';
63 for (int i = 0; i < t.len; i++)
64 o << (char)in.get();
65 o << ']';
66 return o;
67 }
68
69 ostream&
operator <<(ostream & o,const Token & t)70 operator<<(ostream& o,const Token &t)
71 {
72 cout << "Token code:" << t.name() << "(" << t.code << "):[" << t.val << "]\n";
73 cout << "Parts:" << t.parts << "\n";
74 return o;
75 }
76
77 const string
get_c_val() const78 Token::get_c_val() const
79 {
80 ostringstream rval;
81
82 if (get_code() == STRING_LITERAL)
83 rval << '\"';
84 else if (get_code() == CHAR_LITERAL)
85 rval << '\'';
86 rval << get_name();
87 if (get_code() == STRING_LITERAL)
88 rval << '\"';
89 else if (get_code() == CHAR_LITERAL)
90 rval << '\'';
91 return rval.str();
92 }
93
94 Token
unique() const95 Token::unique() const
96 {
97 Token r(code);
98 r.val = val;
99 dequeTpart::const_iterator i;
100 for (i = parts.begin(); i != parts.end(); i++)
101 r.parts.push_back(Tpart(i->get_tokid().unique(), i->get_len()));
102 return (r);
103 }
104
105 dequeTpart
constituents() const106 Token::constituents() const
107 {
108 dequeTpart r;
109 dequeTpart::const_iterator i;
110 for (i = parts.begin(); i != parts.end(); i++) {
111 if (DP()) cout << "Constituents of " << *i << "\n";
112 dequeTpart c = (*i).get_tokid().constituents((*i).get_len());
113 copy(c.begin(), c.end(), back_inserter(r));
114 }
115 return (r);
116 }
117
118 const string
get_refactored_name() const119 Token::get_refactored_name() const
120 {
121 if (parts.begin() == parts.end())
122 return val;
123 string result;
124 for (dequeTpart::const_iterator i = parts.begin(); i != parts.end(); i++) {
125 Eclass *ec = i->get_tokid().check_ec();
126 if (ec == NULL)
127 return val;
128 IdProp::const_iterator idi;
129 idi = Identifier::ids.find(ec);
130 if (idi == Identifier::ids.end())
131 return val;
132 if (idi->second.get_replaced())
133 result += idi->second.get_newid();
134 else
135 result += idi->second.get_id();
136 }
137 if (DP())
138 cout << "refactored name for " << val << " is " << result << endl;
139 return result;
140 }
141
142 void
set_ec_attribute(enum e_attribute a) const143 Token::set_ec_attribute(enum e_attribute a) const
144 {
145 dequeTpart::const_iterator i;
146 for (i = parts.begin(); i != parts.end(); i++)
147 i->get_tokid().set_ec_attribute(a, i->get_len());
148 }
149
150 bool
has_ec_attribute(enum e_attribute a) const151 Token::has_ec_attribute(enum e_attribute a) const
152 {
153 dequeTpart::const_iterator i;
154 for (i = parts.begin(); i != parts.end(); i++)
155 if (i->get_tokid().has_ec_attribute(a, i->get_len()))
156 return true;
157 return false;
158 }
159
160 bool
contains(Eclass * ec) const161 Token::contains(Eclass *ec) const
162 {
163 dequeTpart::const_iterator i;
164 for (i = parts.begin(); i != parts.end(); i++)
165 if ((*i).get_tokid().get_ec() == ec)
166 return (true);
167 return (false);
168 }
169
170 /* Given two Tokid sequences corresponding to two tokens
171 * make these correspond to equivalence classes of same lengths.
172 * Getting the Token constituents again will return Tokids that
173 * satisfy the above postcondition.
174 * The operation only modifies the underlying equivalence classes
175 */
176 void
homogenize(const dequeTpart & a,const dequeTpart & b)177 Tpart::homogenize(const dequeTpart &a, const dequeTpart &b)
178 {
179 dequeTpart::const_iterator ai = a.begin();
180 dequeTpart::const_iterator bi = b.begin();
181 Eclass *ae = (*ai).get_tokid().get_ec();
182 Eclass *be = (*bi).get_tokid().get_ec();
183 int alen, blen;
184
185 if (DP()) cout << "Homogenize a:" << a << " b: " << b << "\n";
186 while (ai != a.end() && bi != b.end()) {
187 alen = ae->get_len();
188 blen = be->get_len();
189 if (DP()) cout << "alen=" << alen << " blen=" << blen << "\n";
190 if (blen < alen) {
191 ae = ae->split(blen - 1);
192 bi++;
193 if (bi != b.end())
194 be = (*bi).get_tokid().get_ec();
195 } else if (alen < blen) {
196 be = be->split(alen - 1);
197 ai++;
198 if (ai != a.end())
199 ae = (*ai).get_tokid().get_ec();
200 } else if (alen == blen) {
201 ai++;
202 if (ai != a.end())
203 ae = (*ai).get_tokid().get_ec();
204 bi++;
205 if (bi != b.end())
206 be = (*bi).get_tokid().get_ec();
207 }
208 }
209 }
210
211 // Unify the constituent equivalence classes for a and b
212 // The definition/reference order is only required when maintaining
213 // dependency relationships across files
214 void
unify(const Token & a,const Token & b)215 Token::unify(const Token &a /* definition */, const Token &b /* reference */)
216 {
217 if (DP()) cout << "Unify " << a << " and " << b << "\n";
218 // Get the constituent Tokids; they may have grown more than the parts
219 dequeTpart ac = a.constituents();
220 dequeTpart bc = b.constituents();
221 // Make the constituents of same length
222 if (DP()) cout << "Before homogenization: " << "\n" << "a=" << a << "\n" << "b=" << b << "\n";
223 Tpart::homogenize(ac, bc);
224 // Get the constituents again; homogenizing may have changed them
225 ac = a.constituents();
226 bc = b.constituents();
227 if (DP()) cout << "After homogenization: " << "\n" << "a=" << ac << "\n" << "b=" << bc << "\n";
228 // Now merge the corresponding ECs
229 dequeTpart::const_iterator ai, bi;
230 for (ai = ac.begin(), bi = bc.begin(); ai != ac.end(); ai++, bi++) {
231 if (check_clashes) {
232 if (ai->get_tokid().get_ec() != bi->get_tokid().get_ec()) {
233 Error::error(E_ERR, "Refactored identifier name clash", true);
234 found_clashes = true;
235 return;
236 }
237 } else {
238 merge(ai->get_tokid().get_ec(), bi->get_tokid().get_ec());
239 Fdep::add_def_ref((*ai).get_tokid(), (*bi).get_tokid(), (*ai).get_tokid().get_ec()->get_len());
240 }
241 }
242 if (!check_clashes)
243 csassert(bi == bc.end());
244 }
245
246 ostream&
operator <<(ostream & o,const dequeTpart & dt)247 operator<<(ostream& o,const dequeTpart& dt)
248 {
249 dequeTpart::const_iterator i;
250
251 for (i = dt.begin(); i != dt.end(); i++) {
252 o << *i;
253 if (i + 1 != dt.end())
254 o << ", ";
255 }
256 return (o);
257 }
258
259 /*
260 * Return true if this token is equal on a tokid by tokid
261 * basis with the passed stale token. The passed token's
262 * parts need not correspond to equivalence classes.
263 */
264 bool
equals(const Token & stale) const265 Token::equals(const Token &stale) const
266 {
267 dequeTpart freshp(this->constituents());
268 dequeTpart stalep(stale.get_parts_begin(), stale.get_parts_end());
269 dequeTpart::const_iterator fi, si;
270 Tokid fid, sid;
271 int flen, slen;
272
273 fi = freshp.begin();
274 si = stalep.begin();
275 for (;;) {
276 if (fi == freshp.end() && si == stalep.end())
277 return true;
278 if (fi == freshp.end() || si == stalep.end())
279 return false;
280 fid = fi->get_tokid();
281 sid = si->get_tokid();
282 flen = fi->get_len();
283 slen = si->get_len();
284 idcont:
285 if (fid != sid)
286 return false;
287 if (flen == slen) {
288 fi++;
289 si++;
290 continue;
291 } else if (flen > slen) {
292 fid += slen;
293 flen -= slen;
294 si++;
295 if (si == stalep.end())
296 return false;
297 sid = si->get_tokid();
298 slen = si->get_len();
299 goto idcont;
300 } else { // flen < slen
301 sid += flen;
302 slen -= flen;
303 fi++;
304 if (fi == freshp.end())
305 return false;
306 fid = fi->get_tokid();
307 flen = fi->get_len();
308 goto idcont;
309 }
310 }
311 }
312
313 // Return the Tokid best defining this token wrt the current file position
314 Tokid
get_defining_tokid() const315 Token::get_defining_tokid() const
316 {
317 // Trivial case
318 if (parts.size() == 1)
319 return parts.begin()->get_tokid().unique();
320 // Otherwise return the tokid closest to the current file position
321 Tokid current(Fchar::get_context().get_tokid());
322 Tokid best;
323 bool have_best = false;
324 int best_distance = numeric_limits<int>::max();
325 int d;
326 for (dequeTpart::const_iterator i = parts.begin(); i != parts.end(); i++)
327 if (i->get_tokid().get_fileid() == current.get_fileid() &&
328 (d = labs(i->get_tokid().get_streampos() - current.get_streampos())) < best_distance) {
329 best_distance = d;
330 best = i->get_tokid();
331 have_best = true;
332 }
333 if (have_best)
334 return best.unique();
335 else
336 return parts.begin()->get_tokid().unique();
337 }
338
339
340 #ifdef UNIT_TEST
341 // cl -GX -DWIN32 -c eclass.cpp fileid.cpp tokid.cpp tokname.cpp
342 // cl -GX -DWIN32 -DUNIT_TEST token.cpp tokid.obj eclass.obj tokname.obj fileid.obj kernel32.lib
343
main()344 main()
345 {
346 Token t(IDENTIFIER);
347
348 cout << t;
349
350 return (0);
351 }
352 #endif /* UNIT_TEST */
353