1 /*
2  * (C) Copyright 2001-2015 Diomidis Spinellis
3  *
4  * This file is part of CScout.
5  *
6  * CScout is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * CScout is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with CScout.  If not, see <http://www.gnu.org/licenses/>.
18  *
19  *
20  * A unique file identifier.  This is constructed from the file's name.
21  * Fileids refering to the same underlying file are guaranteed to
22  * compare equal.
23  *
24  * Include synopsis:
25  * #include <map>
26  * #include <string>
27  * #include <vector>
28  * #include <list>
29  * #include <set>
30  *
31  * #include "attr.h"
32  * #include "metrics.h"
33  *
34  */
35 
36 #ifndef FILEID_
37 #define FILEID_
38 
39 #include <set>
40 #include <list>
41 
42 using namespace std;
43 
44 #include "filemetrics.h"
45 
46 using namespace std;
47 
48 // Details we keep for each included file for a given includer
49 class IncDetails {
50 private:
51 	bool direct;		// True if directly included
52 	bool required;		// True if its inclusion is required
53 	set <int> lnum;		// Line numbers that include it (for direct includes)
54 public:
55 	// Construct with r and d
IncDetails(bool d,bool r)56 	IncDetails(bool d, bool r) : direct(d), required(r) {}
57 
58 	// Conservatively update r and d
update(bool d,bool r)59 	void update(bool d, bool r) {
60 		direct = (direct || d);
61 		required = (required || r);
62 	}
63 
64 	// Add another line number in the set
add_line(int line)65 	void add_line(int line) {
66 		lnum.insert(line);
67 	}
is_required()68 	bool is_required() const {return required; }
is_directly_included()69 	bool is_directly_included() const {return direct; }
include_line_numbers()70 	const set <int>& include_line_numbers() const {return lnum; }
71 };
72 
73 class Fileid;
74 class Fchar;
75 class Call;
76 
77 // Used to order Call sets by their function location in a file
78 struct function_file_order : public binary_function <const Call *, const Call *, bool> {
79       bool operator()(const Call *a, const Call *b) const;
80 };
81 
82 typedef map <Fileid, IncDetails> FileIncMap;
83 typedef vector<unsigned char> FileHash;
84 typedef set <Call *, function_file_order> FCallSet;
85 typedef set <Fileid> Fileidset;
86 
87 // Details we keep for each file
88 class Filedetails {
89 private:
90 	string name;	// File name (complete path)
91 	bool m_garbage_collected;	// When postprocessing files to garbage collect ECs
92 	bool m_required;		// When postprocessing files actually required (containing definitions)
93 	bool m_compilation_unit;	// This file is a compilation unit (set by gc)
94 	// Line end offsets; collected during postprocessing
95 	// when we are generating warning reports
96 	vector <streampos> line_ends;
97 	// Lines that were processed (rather than skipped)
98 	vector <bool> processed_lines;;
99 	FileIncMap includes;	// Files we include
100 	FileIncMap includers;	// Files that include us
101 	FileHash hash;			// MD5 hash for the file's contents
102 	int ipath_offset;	// Offset in the include file path where this file was found
103 	Fileidset runtime_uses;	// Files whose global objects this file uses at runtime
104 	Fileidset runtime_used_by;	// Files that use at runtime this file's global objects
105 
106 	// Update the specified map
107 	void include_update(const Fileid f, FileIncMap Filedetails::*map, bool directly, bool required, int line);
108 
109 	bool hand_edited;	// True for files that have been hand-edited
110 	string contents;	// Original contents, if hand-edited
111 	bool visited;                   // For calculating transitive closures
112 public:
113 	Attributes attr;		// The projects this file participates in
114 	FileMetrics m;			// File's metrics
115 	FCallSet df;			// Functions defined in this file
116 	Filedetails(string n, bool r, const FileHash &h);
117 	Filedetails();
get_name()118 	const string& get_name() const { return name; }
get_readonly()119 	bool get_readonly() { return attr.get_attribute(is_readonly); }
get_filehash()120 	const FileHash & get_filehash() const { return hash; }
set_readonly(bool r)121 	void set_readonly(bool r) { attr.set_attribute_val(is_readonly, r); }
garbage_collected()122 	bool garbage_collected() const { return m_garbage_collected; }
set_gc(bool r)123 	void set_gc(bool r) { m_garbage_collected = r; }
required()124 	bool required() const { return m_required; }
set_required(bool r)125 	void set_required(bool r) { m_required = r; }
compilation_unit()126 	bool compilation_unit() const { return m_compilation_unit; }
set_compilation_unit(bool r)127 	void set_compilation_unit(bool r) { m_compilation_unit = r; }
128 	void process_line(bool processed);
is_processed(unsigned line)129 	bool is_processed(unsigned line) const {
130 		return line <= processed_lines.size() &&
131 			processed_lines[line - 1];
132 	};
133 	// Add and retrieve line numbers
134 	// Should be called every time a newline is encountered
add_line_end(streampos p)135 	void add_line_end(streampos p) { line_ends.push_back(p); }
136 	// Return a line number given a file offset
137 	int line_number(streampos p) const;
138 
139 
140 	// Update maps when includer (us) includes included
141 	void include_update_included(const Fileid included, bool directly, bool required, int line);
142 	void include_update_includer(const Fileid includer, bool directly, bool required, int line);
get_includes()143 	const FileIncMap& get_includes() const { return includes; }
get_includers()144 	const FileIncMap& get_includers() const { return includers; }
145 	// Should be called before hand-editing
146 	int hand_edit();
147 	// True if file has been hand-edited
is_hand_edited()148 	bool is_hand_edited() const { return hand_edited; }
149 	// Return the file's original contents
get_original_contents()150 	const string &get_original_contents() const { return contents; }
151 	// Include file path offset
get_ipath_offset()152 	int get_ipath_offset() const { return ipath_offset; }
set_ipath_offset(int o)153 	void set_ipath_offset(int o) { ipath_offset = o; }
set_visited()154 	void set_visited() { visited = true; }
clear_visited()155 	void clear_visited() { visited = false; }
is_visited()156 	bool is_visited() const { return visited; }
157 	// Add file that this file uses at runtime
158 	void glob_uses(Fileid f);
159 	// Add file that is used by this file at runtime
160 	void glob_used_by(Fileid f);
161 	// Return the set of files that we depend on for runtime objects
glob_uses()162 	const Fileidset & glob_uses() const { return runtime_uses; }
163 	// Return the set of files that depend on us for runtime objects
glob_used_by()164 	const Fileidset & glob_used_by() const { return runtime_used_by; }
165 };
166 
167 typedef map <string, int> FI_uname_to_id;
168 typedef vector <Filedetails> FI_id_to_details;
169 /*
170  * This is used for keeping identical files
171  * The value type must be ordered by the integer Fileid
172  * in order to keep *values.begin() invariant.
173  * This property is used by tokid unique for returning unique tokids
174  */
175 typedef map <FileHash, set<Fileid> > FI_hash_to_ids;
176 
177 /*
178  * A unique file identifier
179  * Keep the instance members of this class spartan
180  * We create billions of such objects
181  * Add details in the Filedetails class
182  */
183 class Fileid {
184 private:
185 	int id;				// One global unique id per workspace file
186 
187 	static int counter;		// To generate ids
188 	static FI_uname_to_id u2i;	// From unique name to id
189 	static FI_hash_to_ids identical_files;// Files that are exact duplicates
190 	static FI_id_to_details i2d;	// From id to file details
191 
192 	// Construct a new Fileid given a name and id value
193 	// Only used internally for creating the anonymous id
194 	Fileid(const string& name, int id);
195 	// An anonymous id
196 	static Fileid anonymous;
197 	// The prefix for read-only files
198 	static list <string> ro_prefix;
199 	// And a function to check fnames against it
200 	static bool is_readonly(string fname);
201 
202 public:
203 	// Construct a new Fileid given a filename
204 	Fileid(const string& fname);
205 	// Create it without any checking from an integer
Fileid(int i)206 	Fileid(int i) : id(i) {}
207 	// Construct an anonymous Fileid
Fileid()208 	Fileid() { *this = Fileid::anonymous; };
209 	// Return the full file path of a given id
210 	const string& get_path() const;
211 	const string get_fname() const;
212 	const string get_dir() const;
213 	// Handle the read-only file detail information
214 	bool get_readonly() const;
215 	void set_readonly(bool r);
get_id()216 	int get_id() const {return id; }
217 	// Clear the maps
218 	static void clear();
219 	// Set the prefix for read-only files
add_ro_prefix(string prefix)220 	static void add_ro_prefix(string prefix) { ro_prefix.push_back(prefix); }
221 	// Unify identifiers of files that are exact copies
222 	static void unify_identical_files(void);
223 	// Return the maximum file id
max_id()224 	static int max_id() { return counter - 1; }
225 	// Clear the visited flag for all fileids
226 	static void clear_all_visited();
227 	// Return a reference to the Metrics class
metrics()228 	FileMetrics &metrics() { return i2d[id].m; }
229 	// Return a reference to the Metrics class
const_metrics()230 	const FileMetrics &const_metrics() const { return i2d[id].m; }
231 	// Return the set of the file's functions
get_functions()232 	FCallSet &get_functions() const { return i2d[id].df; }
add_function(Call * f)233 	void add_function(Call *f) { i2d[id].df.insert(f); }
234 	// Get /set attributes
set_attribute(int v)235 	void set_attribute(int v) { i2d[id].attr.set_attribute(v); }
get_attribute(int v)236 	bool get_attribute(int v) { return i2d[id].attr.get_attribute(v); }
237 	// Get/set the garbage collected property
set_gc(bool v)238 	void set_gc(bool v) { i2d[id].set_gc(v); }
garbage_collected()239 	bool garbage_collected() const { return i2d[id].garbage_collected(); }
240 	// Get/set required property (for include files)
set_required(bool v)241 	void set_required(bool v) { i2d[id].set_required(v); }
required()242 	bool required() const { return i2d[id].required(); }
243 	// Get/set compilation_unit property (for include files)
set_compilation_unit(bool v)244 	void set_compilation_unit(bool v) { i2d[id].set_compilation_unit(v); }
compilation_unit()245 	bool compilation_unit() const { return i2d[id].compilation_unit(); }
246 	// Mark a line as processed
process_line(bool processed)247 	void process_line(bool processed) {i2d[id].process_line(processed); }
248 	// Return true if a line is processed
is_processed(int line)249 	bool is_processed(int line) const { return i2d[id].is_processed(line); };
250 	// Return the set of files that are the same as this (including this)
get_identical_files()251 	const Fileidset & get_identical_files() const { return identical_files[i2d[id].get_filehash()]; }
252 	// Return the set of files that we depend on for runtime objects
glob_uses()253 	const Fileidset & glob_uses() const { return i2d[id].glob_uses(); }
254 	// Return the set of files that depend on us for runtime objects
glob_used_by()255 	const Fileidset & glob_used_by() const { return i2d[id].glob_used_by(); }
256 	// Include file path offset
set_ipath_offset(int o)257 	void set_ipath_offset(int o) { i2d[id].set_ipath_offset(o); }
get_ipath_offset()258 	int get_ipath_offset() const { return i2d[id].get_ipath_offset(); }
259 
set_visited()260 	void set_visited() { i2d[id].set_visited(); }
clear_visited()261 	void clear_visited() { i2d[id].clear_visited(); }
is_visited()262 	bool is_visited() const { return i2d[id].is_visited(); }
263 
264 	// Add file that this file uses at runtime
glob_uses(Fileid f)265 	void glob_uses(Fileid f) { i2d[id].glob_uses(f); }
266 	// Add file that is used by this file at runtime
glob_used_by(Fileid f)267 	void glob_used_by(Fileid f) { i2d[id].glob_used_by(f); }
268 
269 	// Add and retrieve line numbers
270 	// Should be called every time a newline is encountered
add_line_end(streampos p)271 	void add_line_end(streampos p) { i2d[id].add_line_end(p); }
272 	// Return a line number given a file offset
line_number(streampos p)273 	int line_number(streampos p) const { return i2d[id].line_number(p); }
274 
275 	/*
276 	 * Called when we include file f
277 	 * A false value in the Boolean flags can simply mean "don't know" and
278 	 * can be later upgraded to true.
279 	 */
280 	void includes(const Fileid f, bool directly, bool required, int line = -1) {
281 		i2d[id].include_update_included(f, directly, required, line);
282 		i2d[f.get_id()].include_update_includer(id, directly, required, line);
283 	}
284 
get_includes()285 	const FileIncMap& get_includes() const { return i2d[id].get_includes(); }
get_includers()286 	const FileIncMap& get_includers() const { return i2d[id].get_includers(); }
287 
288 	inline friend bool operator ==(const class Fileid a, const class Fileid b);
289 	inline friend bool operator !=(const class Fileid a, const class Fileid b);
290 	inline friend bool operator <(const class Fileid a, const class Fileid b);
291 	// Should be called before hand-editing.  Return 0 if OK, !0 on error.
hand_edit()292 	int hand_edit() { return i2d[id].hand_edit(); }
293 	// True if file has been hand-edited
is_hand_edited()294 	bool is_hand_edited() { return i2d[id].is_hand_edited(); }
295 	// Return the file's original contents
get_original_contents()296 	const string &get_original_contents() { return i2d[id].get_original_contents(); }
297 	// Return a (possibly sorted) list of all filenames used
298 	static vector <Fileid> files(bool sorted);
299 	// Return a reference to the underlying file's metrics
300 };
301 
302 // Add file that this file uses at runtime
glob_uses(Fileid f)303 inline void Filedetails::glob_uses(Fileid f) { runtime_uses.insert(f); }
304 // Add file that is used by this file at runtime
glob_used_by(Fileid f)305 inline void Filedetails::glob_used_by(Fileid f) { runtime_used_by.insert(f); }
306 
307 inline bool
308 operator ==(const class Fileid a, const class Fileid b)
309 {
310 	return (a.id == b.id);
311 }
312 
313 inline bool
314 operator !=(const class Fileid a, const class Fileid b)
315 {
316 	return (a.id != b.id);
317 }
318 
319 inline bool
320 operator <(const class Fileid a, const class Fileid b)
321 {
322 	return (a.id < b.id);
323 }
324 
325 // Can be used to order Fileid sets
326 struct fname_order : public binary_function <const Fileid &, const Fileid &, bool> {
operatorfname_order327       bool operator()(const Fileid &a, const Fileid &b) const {
328 	      return a.get_path() < b.get_path();
329       }
330 };
331 
332 typedef set <Fileid, fname_order> IFSet;
333 
334 #endif /* FILEID_ */
335