1 /*
2  * (C) Copyright 2001-2015 Diomidis Spinellis
3  *
4  * This file is part of CScout.
5  *
6  * CScout is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * CScout is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with CScout.  If not, see <http://www.gnu.org/licenses/>.
18  *
19  *
20  * For documentation read the corresponding .h file
21  *
22  */
23 
24 #include <iostream>
25 #include <map>
26 #include <string>
27 #include <deque>
28 #include <set>
29 #include <vector>
30 #include <fstream>
31 #include <list>
32 #include <sstream>		// ostringstream
33 #include <limits>
34 #include <cstdlib>		// abs
35 
36 #include "cpp.h"
37 #include "error.h"
38 #include "attr.h"
39 #include "metrics.h"
40 #include "fileid.h"
41 #include "tokid.h"
42 #include "eclass.h"
43 #include "token.h"
44 #include "parse.tab.h"
45 #include "debug.h"
46 #include "fdep.h"
47 #include "idquery.h"
48 #include "fchar.h"
49 
50 bool Token::check_clashes;
51 bool Token::found_clashes;
52 
53 // Display a token part
54 ostream&
operator <<(ostream & o,const Tpart & t)55 operator<<(ostream& o,const Tpart &t)
56 {
57 	cout << t.ti << ",l=" << t.len;
58 	ifstream in(t.ti.get_path().c_str());
59 	if (in.fail())
60 		return o;
61 	in.seekg(t.ti.get_streampos());
62 	o << '[';
63 	for (int i = 0; i < t.len; i++)
64 		o << (char)in.get();
65 	o << ']';
66 	return o;
67 }
68 
69 ostream&
operator <<(ostream & o,const Token & t)70 operator<<(ostream& o,const Token &t)
71 {
72 	cout << "Token code:" << t.name() << "(" << t.code << "):[" << t.val << "]\n";
73 	cout << "Parts:" << t.parts << "\n";
74 	return o;
75 }
76 
77 const string
get_c_val() const78 Token::get_c_val() const
79 {
80 	ostringstream rval;
81 
82 	if (get_code() == STRING_LITERAL)
83 		rval << '\"';
84 	else if (get_code() == CHAR_LITERAL)
85 		rval << '\'';
86 	rval << get_name();
87 	if (get_code() == STRING_LITERAL)
88 		rval << '\"';
89 	else if (get_code() == CHAR_LITERAL)
90 		rval << '\'';
91 	return rval.str();
92 }
93 
94 Token
unique() const95 Token::unique() const
96 {
97 	Token r(code);
98 	r.val = val;
99 	dequeTpart::const_iterator i;
100 	for (i = parts.begin(); i != parts.end(); i++)
101 		r.parts.push_back(Tpart(i->get_tokid().unique(), i->get_len()));
102 	return (r);
103 }
104 
105 dequeTpart
constituents() const106 Token::constituents() const
107 {
108 	dequeTpart r;
109 	dequeTpart::const_iterator i;
110 	for (i = parts.begin(); i != parts.end(); i++) {
111 		if (DP()) cout << "Constituents of " << *i << "\n";
112 		dequeTpart c = (*i).get_tokid().constituents((*i).get_len());
113 		copy(c.begin(), c.end(), back_inserter(r));
114 	}
115 	return (r);
116 }
117 
118 const string
get_refactored_name() const119 Token::get_refactored_name() const
120 {
121 	if (parts.begin() == parts.end())
122 		return val;
123 	string result;
124 	for (dequeTpart::const_iterator i = parts.begin(); i != parts.end(); i++) {
125 		Eclass *ec = i->get_tokid().check_ec();
126 		if (ec == NULL)
127 			return val;
128 		IdProp::const_iterator idi;
129 		idi = Identifier::ids.find(ec);
130 		if (idi == Identifier::ids.end())
131 			return val;
132 		if (idi->second.get_replaced())
133 			result += idi->second.get_newid();
134 		else
135 			result += idi->second.get_id();
136 	}
137 	if (DP())
138 		cout << "refactored name for " << val << " is " << result << endl;
139 	return result;
140 }
141 
142 void
set_ec_attribute(enum e_attribute a) const143 Token::set_ec_attribute(enum e_attribute a) const
144 {
145 	dequeTpart::const_iterator i;
146 	for (i = parts.begin(); i != parts.end(); i++)
147 		i->get_tokid().set_ec_attribute(a, i->get_len());
148 }
149 
150 bool
has_ec_attribute(enum e_attribute a) const151 Token::has_ec_attribute(enum e_attribute a) const
152 {
153 	dequeTpart::const_iterator i;
154 	for (i = parts.begin(); i != parts.end(); i++)
155 		if (i->get_tokid().has_ec_attribute(a, i->get_len()))
156 			return true;
157 	return false;
158 }
159 
160 bool
contains(Eclass * ec) const161 Token::contains(Eclass *ec) const
162 {
163 	dequeTpart::const_iterator i;
164 	for (i = parts.begin(); i != parts.end(); i++)
165 		if ((*i).get_tokid().get_ec() == ec)
166 			return (true);
167 	return (false);
168 }
169 
170 /* Given two Tokid sequences corresponding to two tokens
171  * make these correspond to equivalence classes of same lengths.
172  * Getting the Token constituents again will return Tokids that
173  * satisfy the above postcondition.
174  * The operation only modifies the underlying equivalence classes
175  */
176 void
homogenize(const dequeTpart & a,const dequeTpart & b)177 Tpart::homogenize(const dequeTpart &a, const dequeTpart &b)
178 {
179 	dequeTpart::const_iterator ai = a.begin();
180 	dequeTpart::const_iterator bi = b.begin();
181 	Eclass *ae = (*ai).get_tokid().get_ec();
182 	Eclass *be = (*bi).get_tokid().get_ec();
183 	int alen, blen;
184 
185 	if (DP()) cout << "Homogenize a:" << a << " b: " << b << "\n";
186 	while (ai != a.end() && bi != b.end()) {
187 		alen = ae->get_len();
188 		blen = be->get_len();
189 		if (DP()) cout << "alen=" << alen << " blen=" << blen << "\n";
190 		if (blen < alen) {
191 			ae = ae->split(blen - 1);
192 			bi++;
193 			if (bi != b.end())
194 				be = (*bi).get_tokid().get_ec();
195 		} else if (alen < blen) {
196 			be = be->split(alen - 1);
197 			ai++;
198 			if (ai != a.end())
199 				ae = (*ai).get_tokid().get_ec();
200 		} else if (alen == blen) {
201 			ai++;
202 			if (ai != a.end())
203 				ae = (*ai).get_tokid().get_ec();
204 			bi++;
205 			if (bi != b.end())
206 				be = (*bi).get_tokid().get_ec();
207 		}
208 	}
209 }
210 
211 // Unify the constituent equivalence classes for a and b
212 // The definition/reference order is only required when maintaining
213 // dependency relationships across files
214 void
unify(const Token & a,const Token & b)215 Token::unify(const Token &a /* definition */, const Token &b /* reference */)
216 {
217 	if (DP()) cout << "Unify " << a << " and " << b << "\n";
218 	// Get the constituent Tokids; they may have grown more than the parts
219 	dequeTpart ac = a.constituents();
220 	dequeTpart bc = b.constituents();
221 	// Make the constituents of same length
222 	if (DP()) cout << "Before homogenization: " << "\n" << "a=" << a << "\n" << "b=" << b << "\n";
223 	Tpart::homogenize(ac, bc);
224 	// Get the constituents again; homogenizing may have changed them
225 	ac = a.constituents();
226 	bc = b.constituents();
227 	if (DP()) cout << "After homogenization: " << "\n" << "a=" << ac << "\n" << "b=" << bc << "\n";
228 	// Now merge the corresponding ECs
229 	dequeTpart::const_iterator ai, bi;
230 	for (ai = ac.begin(), bi = bc.begin(); ai != ac.end(); ai++, bi++) {
231 		if (check_clashes) {
232 			if (ai->get_tokid().get_ec() != bi->get_tokid().get_ec()) {
233 				Error::error(E_ERR, "Refactored identifier name clash", true);
234 				found_clashes = true;
235 				return;
236 			}
237 		} else {
238 			merge(ai->get_tokid().get_ec(), bi->get_tokid().get_ec());
239 			Fdep::add_def_ref((*ai).get_tokid(), (*bi).get_tokid(), (*ai).get_tokid().get_ec()->get_len());
240 		}
241 	}
242 	if (!check_clashes)
243 		csassert(bi == bc.end());
244 }
245 
246 ostream&
operator <<(ostream & o,const dequeTpart & dt)247 operator<<(ostream& o,const dequeTpart& dt)
248 {
249 	dequeTpart::const_iterator i;
250 
251 	for (i = dt.begin(); i != dt.end(); i++) {
252 		o << *i;
253 		if (i + 1 != dt.end())
254 			o << ", ";
255 	}
256 	return (o);
257 }
258 
259 /*
260  * Return true if this token is equal on a tokid by tokid
261  * basis with the passed stale token.  The passed token's
262  * parts need not correspond to equivalence classes.
263  */
264 bool
equals(const Token & stale) const265 Token::equals(const Token &stale) const
266 {
267 	dequeTpart freshp(this->constituents());
268 	dequeTpart stalep(stale.get_parts_begin(), stale.get_parts_end());
269 	dequeTpart::const_iterator fi, si;
270 	Tokid fid, sid;
271 	int flen, slen;
272 
273 	fi = freshp.begin();
274 	si = stalep.begin();
275 	for (;;) {
276 		if (fi == freshp.end() && si == stalep.end())
277 			return true;
278 		if (fi == freshp.end() || si == stalep.end())
279 			return false;
280 		fid = fi->get_tokid();
281 		sid = si->get_tokid();
282 		flen = fi->get_len();
283 		slen = si->get_len();
284 	idcont:
285 		if (fid != sid)
286 			return false;
287 		if (flen == slen) {
288 			fi++;
289 			si++;
290 			continue;
291 		} else if (flen > slen) {
292 			fid += slen;
293 			flen -= slen;
294 			si++;
295 			if (si == stalep.end())
296 				return false;
297 			sid = si->get_tokid();
298 			slen = si->get_len();
299 			goto idcont;
300 		} else { // flen < slen
301 			sid += flen;
302 			slen -= flen;
303 			fi++;
304 			if (fi == freshp.end())
305 				return false;
306 			fid = fi->get_tokid();
307 			flen = fi->get_len();
308 			goto idcont;
309 		}
310 	}
311 }
312 
313 // Return the Tokid best defining this token wrt the current file position
314 Tokid
get_defining_tokid() const315 Token::get_defining_tokid() const
316 {
317 	// Trivial case
318 	if (parts.size() == 1)
319 		return parts.begin()->get_tokid().unique();
320 	// Otherwise return the tokid closest to the current file position
321 	Tokid current(Fchar::get_context().get_tokid());
322 	Tokid best;
323 	bool have_best = false;
324 	int best_distance = numeric_limits<int>::max();
325 	int d;
326 	for (dequeTpart::const_iterator i = parts.begin(); i != parts.end(); i++)
327 		if (i->get_tokid().get_fileid() == current.get_fileid() &&
328 		    (d = labs(i->get_tokid().get_streampos() - current.get_streampos())) < best_distance) {
329 		    	best_distance = d;
330 			best = i->get_tokid();
331 			have_best = true;
332 		}
333 	if (have_best)
334 		return best.unique();
335 	else
336 		return parts.begin()->get_tokid().unique();
337 }
338 
339 
340 #ifdef UNIT_TEST
341 // cl -GX -DWIN32 -c eclass.cpp fileid.cpp tokid.cpp tokname.cpp
342 // cl -GX -DWIN32 -DUNIT_TEST token.cpp tokid.obj eclass.obj tokname.obj fileid.obj kernel32.lib
343 
main()344 main()
345 {
346 	Token t(IDENTIFIER);
347 
348 	cout << t;
349 
350 	return (0);
351 }
352 #endif /* UNIT_TEST */
353