1 /*
2  * (C) Copyright 2001-2015 Diomidis Spinellis
3  *
4  * This file is part of CScout.
5  *
6  * CScout is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * CScout is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with CScout.  If not, see <http://www.gnu.org/licenses/>.
18  *
19  *
20  * For documentation read the corresponding .h file
21  *
22  */
23 
24 #include <fstream>
25 #include <stack>
26 #include <map>
27 #include <iostream>
28 #include <string>
29 #include <map>
30 #include <list>
31 #include <deque>
32 #include <set>
33 #include <cctype>
34 #include <vector>
35 #include <algorithm>
36 #if defined(unix) || defined(__unix__) || defined(__MACH__)
37 #include <unistd.h>		// access(2)
38 #else
39 #include <io.h>			// access(2)
40 #endif
41 
42 #include "cpp.h"
43 #include "debug.h"
44 #include "error.h"
45 #include "attr.h"
46 #include "metrics.h"
47 #include "fileid.h"
48 #include "tokid.h"
49 #include "fchar.h"
50 #include "token.h"
51 #include "parse.tab.h"
52 #include "ptoken.h"
53 #include "pltoken.h"
54 #include "call.h"
55 #include "md5.h"
56 #include "os.h"
57 
58 int Fileid::counter;		// To generate ids
59 FI_uname_to_id Fileid::u2i;	// From unique name to id
60 FI_id_to_details Fileid::i2d;	// From id to file details
61 FI_hash_to_ids Fileid::identical_files;// Files that are exact duplicates
62 Fileid Fileid::anonymous = Fileid("ANONYMOUS", 0);
63 list <string> Fileid::ro_prefix;	// Read-only prefix
64 
65 // Clear the maps
66 void
clear()67 Fileid::clear()
68 {
69 	u2i.clear();
70 	i2d.clear();
71 	Fileid::anonymous = Fileid("ANONYMOUS", 0);
72 }
73 
74 #ifdef WIN32
75 // Return the canonical representation of a WIN32 filename character
76 char
win32canonical(char c)77 win32canonical(char c)
78 {
79 	if (c == '\\')
80 		return '/';
81 	else
82 		return (toupper(c));
83 }
84 #endif
85 
86 // Return true if file fname is read-only
87 bool
is_readonly(string fname)88 Fileid::is_readonly(string fname)
89 {
90 	if (DP())
91 		cout << "Testing: " << fname << "\n";
92 	for (list <string>::const_iterator i = ro_prefix.begin(); i != ro_prefix.end(); i++) {
93 #ifdef WIN32
94 		unsigned j;
95 
96 		for (j = 0; j < i->length(); j++)
97 			if (win32canonical((*i)[j]) != win32canonical(fname[j]))
98 				break;
99 		if (j == i->length())
100 			return true;
101 #else
102 		if (fname.compare(0, i->length(), *i) == 0)
103 			return true;
104 #endif
105 	}
106 	if (access(fname.c_str(), W_OK) != 0)
107 		return true;
108 	if (DP())
109 		cout << fname << ": is writable\n";
110 	return false;
111 }
112 
Fileid(const string & name)113 Fileid::Fileid(const string &name)
114 {
115 	// String identifier of the file
116 	string sid(get_uniq_fname_string(name.c_str()));
117 	FI_uname_to_id::const_iterator uni;
118 
119 	if ((uni = u2i.find(sid)) != u2i.end()) {
120 		// Filename exists; our id is the one from the map
121 		id = uni->second;
122 	} else {
123 		// New filename; add a new fname/id pair in the map tables
124 		string fpath(get_full_path(name.c_str()));
125 		unsigned char *h = MD5File(name.c_str());
126 		FileHash hash(h, h + 16);
127 
128 		u2i[sid] = id = counter++;
129 		i2d.push_back(Filedetails(fpath, is_readonly(name.c_str()), hash));
130 
131 		identical_files[hash].insert(*this);
132 	}
133 	if (DP())
134 		cout << "Fileid(" << name << ") = " << id << "\n";
135 }
136 
137 // User for initialization and testing; not for real files
Fileid(const string & name,int i)138 Fileid::Fileid(const string &name, int i)
139 {
140 	u2i[name] = i;
141 	i2d.resize(i + 1);
142 	i2d[i] = Filedetails(name, true, FileHash());
143 	id = i;
144 	identical_files[FileHash()].insert(*this);
145 	counter = i + 1;
146 }
147 
148 const string&
get_path() const149 Fileid::get_path() const
150 {
151 	return i2d[id].get_name();
152 }
153 
154 const string
get_fname() const155 Fileid::get_fname() const
156 {
157 	const string &path = i2d[id].get_name();
158 	string::size_type slash = path.find_last_of("/\\");
159 	if (slash == string::npos)
160 		return string(path);
161 	else
162 		return string(path, slash + 1);
163 }
164 
165 const string
get_dir() const166 Fileid::get_dir() const
167 {
168 	const string &path = i2d[id].get_name();
169 	string::size_type slash = path.find_last_of("/\\");
170 	if (slash == string::npos)
171 		return string(path);
172 	else
173 		return string(path, 0, slash);
174 }
175 
176 bool
get_readonly() const177 Fileid::get_readonly() const
178 {
179 	return i2d[id].get_readonly();
180 }
181 
182 void
set_readonly(bool r)183 Fileid::set_readonly(bool r)
184 {
185 	i2d[id].set_readonly(r);
186 }
187 
Filedetails(string n,bool r,const FileHash & h)188 Filedetails::Filedetails(string n, bool r, const FileHash &h) :
189 	name(n),
190 	m_garbage_collected(false),
191 	m_required(false),
192 	m_compilation_unit(false),
193 	hash(h),
194 	ipath_offset(0),
195 	hand_edited(false),
196 	visited(false)
197 {
198 	set_readonly(r);
199 }
200 
Filedetails()201 Filedetails::Filedetails() :
202 	m_compilation_unit(false),
203 	ipath_offset(0),
204 	hand_edited(false)
205 {
206 }
207 
208 int
line_number(streampos p) const209 Filedetails::line_number(streampos p) const
210 {
211 	return (upper_bound(line_ends.begin(), line_ends.end(), p) - line_ends.begin()) + 1;
212 }
213 
214 // Update the specified map
215 void
include_update(const Fileid f,FileIncMap Filedetails::* map,bool directly,bool required,int line)216 Filedetails::include_update(const Fileid f, FileIncMap Filedetails::*map, bool directly, bool required, int line)
217 {
218 	FileIncMap::iterator i;
219 
220 	if ((i = (this->*map).find(f)) == (this->*map).end()) {
221 		pair<FileIncMap::iterator, bool> result = (this->*map).insert(
222 			FileIncMap::value_type(f, IncDetails(directly, required)));
223 		i = result.first;
224 	} else
225 		i->second.update(directly, required);
226 	if (line != -1)
227 		i->second.add_line(line);
228 }
229 
230 /*
231  * Update maps when we include included
232  * A false value in the Boolean flags can simply mean "don't know" and
233  * can be later upgraded to true.
234  */
235 void
include_update_included(const Fileid included,bool directly,bool required,int line)236 Filedetails::include_update_included(const Fileid included, bool directly, bool required, int line)
237 {
238 	if (DP())
239 		cout << "File " << this->get_name() << " includes " << included.get_path() <<
240 			(directly ? " directly " : " indirectly ") <<
241 			(required ? " required " : " non required ") <<
242 			" line " << line << "\n";
243 	include_update(included, &Filedetails::includes, directly, required, line);
244 }
245 
246 // Update maps when we are included by includer
247 void
include_update_includer(const Fileid includer,bool directly,bool required,int line)248 Filedetails::include_update_includer(const Fileid includer, bool directly, bool required, int line)
249 {
250 	if (DP())
251 		cout << "File " << includer.get_path() << " included " <<
252 			(directly ? " directly " : " indirectly ") <<
253 			(required ? " required " : " non required ") <<
254 			" line " << line << "\n";
255 	include_update(includer, &Filedetails::includers, directly, required, line);
256 }
257 
258 // Return a sorted list of all filenames used
259 vector <Fileid>
files(bool sorted)260 Fileid::files(bool sorted)
261 {
262 	vector <Fileid> r(i2d.size() - 1);
263 
264 	for (vector <Fileid>::size_type i = 0; i < r.size(); i++)
265 		r[i] = Fileid(i + 1);
266 	if (sorted)
267 		sort(r.begin(), r.end(), fname_order());
268 	return (r);
269 }
270 
271 void
clear_all_visited()272 Fileid::clear_all_visited()
273 {
274 	for (FI_id_to_details::iterator i = i2d.begin(); i != i2d.end(); i++)
275 		i->clear_visited();
276 }
277 
278 void
process_line(bool processed)279 Filedetails::process_line(bool processed)
280 {
281 	int lnum = Fchar::get_line_num() - 1;
282 	int s = processed_lines.size();
283 	if (DP())
284 		cout << "Process line " << name << ':' << lnum << "\n";
285 	if (s == lnum)
286 		// New line processed
287 		processed_lines.push_back(processed);
288 	else if (s > lnum)
289 		processed_lines[lnum] = (processed_lines[lnum] || processed);
290 	else {
291 		// We somehow missed a line
292 		if (DP()) {
293 			cout << "Line number = " << lnum << "\n";
294 			cout << "Vector size = " << s << "\n";
295 		}
296 		csassert(0);
297 	}
298 }
299 
300 int
hand_edit()301 Filedetails::hand_edit()
302 {
303 	ifstream in;
304 
305 	if (hand_edited)
306 		return 0;
307 	in.open(get_name().c_str(), ios::binary);
308 	if (in.fail())
309 		return -1;
310 
311 	int val;
312 	while ((val = in.get()) != EOF)
313 		contents.push_back((char)val);
314 	if (DP())
315 		cout << '[' << contents << ']' << endl;
316 	hand_edited = true;
317 	return 0;
318 }
319 
320 // Read identifier tokens from file fname into tkov
321 static void
read_file(const string & fname,vector<Pltoken> & tokv)322 read_file(const string &fname, vector <Pltoken> &tokv)
323 {
324 	Fchar::set_input(fname);
325 	Pltoken t;
326 
327 	for (;;) {
328 		t.getnext<Fchar>();
329 		switch (t.get_code()) {
330 		case IDENTIFIER:
331 			tokv.push_back(t);
332 			break;
333 		case EOF:
334 			return;
335 		}
336 	}
337 }
338 
339 /*
340  * Unify all identifiers in the files of fs
341  * The corresponding files should be exact duplicates
342  */
343 static void
unify_file_identifiers(const set<Fileid> & fs)344 unify_file_identifiers(const set<Fileid> &fs)
345 {
346 	csassert(fs.size() > 1);
347 	Fileid fi = *(fs.begin());
348 	fifstream in;
349 	vector <Pltoken> ft0, ftn;	// The tokens to unify
350 
351 	read_file(fi.get_path(), ft0);
352 
353 	set <Fileid>::const_iterator fsi = fs.begin();
354 	for (fsi++; fsi != fs.end(); fsi++) {
355 		if (DP())
356 			cout << "Merging identifiers of " << fi.get_path() << " and " << fsi->get_path() << endl;
357 		read_file(fsi->get_path(), ftn);
358 		csassert(ft0.size() == ftn.size());
359 		vector <Pltoken>::iterator ti0, tin;
360 		for (ti0 = ft0.begin(), tin = ftn.begin(); ti0 != ft0.end(); ti0++, tin++)
361 			Token::unify(*ti0, *tin);
362 		ftn.clear();
363 	}
364 }
365 
366 void
unify_identical_files(void)367 Fileid::unify_identical_files(void)
368 {
369 	for (FI_hash_to_ids::const_iterator i = identical_files.begin(); i != identical_files.end(); i++)
370 		if (i->second.size() > 1)
371 			unify_file_identifiers(i->second);
372 }
373 
374 bool
operator ()(const Call * a,const Call * b) const375 function_file_order::operator()(const Call *a, const Call *b) const
376 {
377 	return a->get_begin().get_tokid() < b->get_begin().get_tokid();
378 }
379 
380 #ifdef UNIT_TEST
381 // cl -GX -DWIN32 -DUNIT_TEST fileid.cpp kernel32.lib
382 
383 #include <iostream>
384 
main()385 main()
386 {
387 	Fileid x1;
388 	Fileid x2;
389 	Fileid a("fileid.cpp");
390 	Fileid b("./fileid.cpp");
391 	Fileid c(".");
392 	Fileid d = b;
393 	Fileid e(c);
394 
395 	cout << "a=" << a.get_path() << " (should be fileId.cpp)\n";
396 	cout << "b=" << b.get_path() << " (should be fileId.cpp)\n";
397 	cout << "c=" << c.get_path() << " (should be .)\n";
398 	cout << "d=" << d.get_path() << " (should be fileId.cpp)\n";
399 	cout << "e=" << e.get_path() << " (should be .)\n";
400 	cout << "a==b: " << (a == b) << " (should be 1)\n";
401 	cout << "size=" << sizeof(a) << " (it better be 4)\n";
402 	cout << "x2=" << x2.get_path() << " (should be ANONYMOUS)\n";
403 	cout << "x1==x2: " << (x1 == x2) << " (should be 1)\n";
404 	return (0);
405 }
406 #endif
407