1 /*
2 * (C) Copyright 2001-2015 Diomidis Spinellis
3 *
4 * This file is part of CScout.
5 *
6 * CScout is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * CScout is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with CScout. If not, see <http://www.gnu.org/licenses/>.
18 *
19 *
20 * For documentation read the corresponding .h file
21 *
22 */
23
24 #include <fstream>
25 #include <stack>
26 #include <map>
27 #include <iostream>
28 #include <string>
29 #include <map>
30 #include <list>
31 #include <deque>
32 #include <set>
33 #include <cctype>
34 #include <vector>
35 #include <algorithm>
36 #if defined(unix) || defined(__unix__) || defined(__MACH__)
37 #include <unistd.h> // access(2)
38 #else
39 #include <io.h> // access(2)
40 #endif
41
42 #include "cpp.h"
43 #include "debug.h"
44 #include "error.h"
45 #include "attr.h"
46 #include "metrics.h"
47 #include "fileid.h"
48 #include "tokid.h"
49 #include "fchar.h"
50 #include "token.h"
51 #include "parse.tab.h"
52 #include "ptoken.h"
53 #include "pltoken.h"
54 #include "call.h"
55 #include "md5.h"
56 #include "os.h"
57
58 int Fileid::counter; // To generate ids
59 FI_uname_to_id Fileid::u2i; // From unique name to id
60 FI_id_to_details Fileid::i2d; // From id to file details
61 FI_hash_to_ids Fileid::identical_files;// Files that are exact duplicates
62 Fileid Fileid::anonymous = Fileid("ANONYMOUS", 0);
63 list <string> Fileid::ro_prefix; // Read-only prefix
64
65 // Clear the maps
66 void
clear()67 Fileid::clear()
68 {
69 u2i.clear();
70 i2d.clear();
71 Fileid::anonymous = Fileid("ANONYMOUS", 0);
72 }
73
74 #ifdef WIN32
75 // Return the canonical representation of a WIN32 filename character
76 char
win32canonical(char c)77 win32canonical(char c)
78 {
79 if (c == '\\')
80 return '/';
81 else
82 return (toupper(c));
83 }
84 #endif
85
86 // Return true if file fname is read-only
87 bool
is_readonly(string fname)88 Fileid::is_readonly(string fname)
89 {
90 if (DP())
91 cout << "Testing: " << fname << "\n";
92 for (list <string>::const_iterator i = ro_prefix.begin(); i != ro_prefix.end(); i++) {
93 #ifdef WIN32
94 unsigned j;
95
96 for (j = 0; j < i->length(); j++)
97 if (win32canonical((*i)[j]) != win32canonical(fname[j]))
98 break;
99 if (j == i->length())
100 return true;
101 #else
102 if (fname.compare(0, i->length(), *i) == 0)
103 return true;
104 #endif
105 }
106 if (access(fname.c_str(), W_OK) != 0)
107 return true;
108 if (DP())
109 cout << fname << ": is writable\n";
110 return false;
111 }
112
Fileid(const string & name)113 Fileid::Fileid(const string &name)
114 {
115 // String identifier of the file
116 string sid(get_uniq_fname_string(name.c_str()));
117 FI_uname_to_id::const_iterator uni;
118
119 if ((uni = u2i.find(sid)) != u2i.end()) {
120 // Filename exists; our id is the one from the map
121 id = uni->second;
122 } else {
123 // New filename; add a new fname/id pair in the map tables
124 string fpath(get_full_path(name.c_str()));
125 unsigned char *h = MD5File(name.c_str());
126 FileHash hash(h, h + 16);
127
128 u2i[sid] = id = counter++;
129 i2d.push_back(Filedetails(fpath, is_readonly(name.c_str()), hash));
130
131 identical_files[hash].insert(*this);
132 }
133 if (DP())
134 cout << "Fileid(" << name << ") = " << id << "\n";
135 }
136
137 // User for initialization and testing; not for real files
Fileid(const string & name,int i)138 Fileid::Fileid(const string &name, int i)
139 {
140 u2i[name] = i;
141 i2d.resize(i + 1);
142 i2d[i] = Filedetails(name, true, FileHash());
143 id = i;
144 identical_files[FileHash()].insert(*this);
145 counter = i + 1;
146 }
147
148 const string&
get_path() const149 Fileid::get_path() const
150 {
151 return i2d[id].get_name();
152 }
153
154 const string
get_fname() const155 Fileid::get_fname() const
156 {
157 const string &path = i2d[id].get_name();
158 string::size_type slash = path.find_last_of("/\\");
159 if (slash == string::npos)
160 return string(path);
161 else
162 return string(path, slash + 1);
163 }
164
165 const string
get_dir() const166 Fileid::get_dir() const
167 {
168 const string &path = i2d[id].get_name();
169 string::size_type slash = path.find_last_of("/\\");
170 if (slash == string::npos)
171 return string(path);
172 else
173 return string(path, 0, slash);
174 }
175
176 bool
get_readonly() const177 Fileid::get_readonly() const
178 {
179 return i2d[id].get_readonly();
180 }
181
182 void
set_readonly(bool r)183 Fileid::set_readonly(bool r)
184 {
185 i2d[id].set_readonly(r);
186 }
187
Filedetails(string n,bool r,const FileHash & h)188 Filedetails::Filedetails(string n, bool r, const FileHash &h) :
189 name(n),
190 m_garbage_collected(false),
191 m_required(false),
192 m_compilation_unit(false),
193 hash(h),
194 ipath_offset(0),
195 hand_edited(false),
196 visited(false)
197 {
198 set_readonly(r);
199 }
200
Filedetails()201 Filedetails::Filedetails() :
202 m_compilation_unit(false),
203 ipath_offset(0),
204 hand_edited(false)
205 {
206 }
207
208 int
line_number(streampos p) const209 Filedetails::line_number(streampos p) const
210 {
211 return (upper_bound(line_ends.begin(), line_ends.end(), p) - line_ends.begin()) + 1;
212 }
213
214 // Update the specified map
215 void
include_update(const Fileid f,FileIncMap Filedetails::* map,bool directly,bool required,int line)216 Filedetails::include_update(const Fileid f, FileIncMap Filedetails::*map, bool directly, bool required, int line)
217 {
218 FileIncMap::iterator i;
219
220 if ((i = (this->*map).find(f)) == (this->*map).end()) {
221 pair<FileIncMap::iterator, bool> result = (this->*map).insert(
222 FileIncMap::value_type(f, IncDetails(directly, required)));
223 i = result.first;
224 } else
225 i->second.update(directly, required);
226 if (line != -1)
227 i->second.add_line(line);
228 }
229
230 /*
231 * Update maps when we include included
232 * A false value in the Boolean flags can simply mean "don't know" and
233 * can be later upgraded to true.
234 */
235 void
include_update_included(const Fileid included,bool directly,bool required,int line)236 Filedetails::include_update_included(const Fileid included, bool directly, bool required, int line)
237 {
238 if (DP())
239 cout << "File " << this->get_name() << " includes " << included.get_path() <<
240 (directly ? " directly " : " indirectly ") <<
241 (required ? " required " : " non required ") <<
242 " line " << line << "\n";
243 include_update(included, &Filedetails::includes, directly, required, line);
244 }
245
246 // Update maps when we are included by includer
247 void
include_update_includer(const Fileid includer,bool directly,bool required,int line)248 Filedetails::include_update_includer(const Fileid includer, bool directly, bool required, int line)
249 {
250 if (DP())
251 cout << "File " << includer.get_path() << " included " <<
252 (directly ? " directly " : " indirectly ") <<
253 (required ? " required " : " non required ") <<
254 " line " << line << "\n";
255 include_update(includer, &Filedetails::includers, directly, required, line);
256 }
257
258 // Return a sorted list of all filenames used
259 vector <Fileid>
files(bool sorted)260 Fileid::files(bool sorted)
261 {
262 vector <Fileid> r(i2d.size() - 1);
263
264 for (vector <Fileid>::size_type i = 0; i < r.size(); i++)
265 r[i] = Fileid(i + 1);
266 if (sorted)
267 sort(r.begin(), r.end(), fname_order());
268 return (r);
269 }
270
271 void
clear_all_visited()272 Fileid::clear_all_visited()
273 {
274 for (FI_id_to_details::iterator i = i2d.begin(); i != i2d.end(); i++)
275 i->clear_visited();
276 }
277
278 void
process_line(bool processed)279 Filedetails::process_line(bool processed)
280 {
281 int lnum = Fchar::get_line_num() - 1;
282 int s = processed_lines.size();
283 if (DP())
284 cout << "Process line " << name << ':' << lnum << "\n";
285 if (s == lnum)
286 // New line processed
287 processed_lines.push_back(processed);
288 else if (s > lnum)
289 processed_lines[lnum] = (processed_lines[lnum] || processed);
290 else {
291 // We somehow missed a line
292 if (DP()) {
293 cout << "Line number = " << lnum << "\n";
294 cout << "Vector size = " << s << "\n";
295 }
296 csassert(0);
297 }
298 }
299
300 int
hand_edit()301 Filedetails::hand_edit()
302 {
303 ifstream in;
304
305 if (hand_edited)
306 return 0;
307 in.open(get_name().c_str(), ios::binary);
308 if (in.fail())
309 return -1;
310
311 int val;
312 while ((val = in.get()) != EOF)
313 contents.push_back((char)val);
314 if (DP())
315 cout << '[' << contents << ']' << endl;
316 hand_edited = true;
317 return 0;
318 }
319
320 // Read identifier tokens from file fname into tkov
321 static void
read_file(const string & fname,vector<Pltoken> & tokv)322 read_file(const string &fname, vector <Pltoken> &tokv)
323 {
324 Fchar::set_input(fname);
325 Pltoken t;
326
327 for (;;) {
328 t.getnext<Fchar>();
329 switch (t.get_code()) {
330 case IDENTIFIER:
331 tokv.push_back(t);
332 break;
333 case EOF:
334 return;
335 }
336 }
337 }
338
339 /*
340 * Unify all identifiers in the files of fs
341 * The corresponding files should be exact duplicates
342 */
343 static void
unify_file_identifiers(const set<Fileid> & fs)344 unify_file_identifiers(const set<Fileid> &fs)
345 {
346 csassert(fs.size() > 1);
347 Fileid fi = *(fs.begin());
348 fifstream in;
349 vector <Pltoken> ft0, ftn; // The tokens to unify
350
351 read_file(fi.get_path(), ft0);
352
353 set <Fileid>::const_iterator fsi = fs.begin();
354 for (fsi++; fsi != fs.end(); fsi++) {
355 if (DP())
356 cout << "Merging identifiers of " << fi.get_path() << " and " << fsi->get_path() << endl;
357 read_file(fsi->get_path(), ftn);
358 csassert(ft0.size() == ftn.size());
359 vector <Pltoken>::iterator ti0, tin;
360 for (ti0 = ft0.begin(), tin = ftn.begin(); ti0 != ft0.end(); ti0++, tin++)
361 Token::unify(*ti0, *tin);
362 ftn.clear();
363 }
364 }
365
366 void
unify_identical_files(void)367 Fileid::unify_identical_files(void)
368 {
369 for (FI_hash_to_ids::const_iterator i = identical_files.begin(); i != identical_files.end(); i++)
370 if (i->second.size() > 1)
371 unify_file_identifiers(i->second);
372 }
373
374 bool
operator ()(const Call * a,const Call * b) const375 function_file_order::operator()(const Call *a, const Call *b) const
376 {
377 return a->get_begin().get_tokid() < b->get_begin().get_tokid();
378 }
379
380 #ifdef UNIT_TEST
381 // cl -GX -DWIN32 -DUNIT_TEST fileid.cpp kernel32.lib
382
383 #include <iostream>
384
main()385 main()
386 {
387 Fileid x1;
388 Fileid x2;
389 Fileid a("fileid.cpp");
390 Fileid b("./fileid.cpp");
391 Fileid c(".");
392 Fileid d = b;
393 Fileid e(c);
394
395 cout << "a=" << a.get_path() << " (should be fileId.cpp)\n";
396 cout << "b=" << b.get_path() << " (should be fileId.cpp)\n";
397 cout << "c=" << c.get_path() << " (should be .)\n";
398 cout << "d=" << d.get_path() << " (should be fileId.cpp)\n";
399 cout << "e=" << e.get_path() << " (should be .)\n";
400 cout << "a==b: " << (a == b) << " (should be 1)\n";
401 cout << "size=" << sizeof(a) << " (it better be 4)\n";
402 cout << "x2=" << x2.get_path() << " (should be ANONYMOUS)\n";
403 cout << "x1==x2: " << (x1 == x2) << " (should be 1)\n";
404 return (0);
405 }
406 #endif
407