1 // Copyright 2012 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "deps_log.h"
16
17 #include <assert.h>
18 #include <stdio.h>
19 #include <errno.h>
20 #include <string.h>
21 #ifndef _WIN32
22 #include <unistd.h>
23 #endif
24
25 #include "graph.h"
26 #include "metrics.h"
27 #include "state.h"
28 #include "util.h"
29
30 // The version is stored as 4 bytes after the signature and also serves as a
31 // byte order mark. Signature and version combined are 16 bytes long.
32 const char kFileSignature[] = "# ninjadeps\n";
33 const int kCurrentVersion = 3;
34
35 // Record size is currently limited to less than the full 32 bit, due to
36 // internal buffers having to have this size.
37 const unsigned kMaxRecordSize = (1 << 19) - 1;
38
~DepsLog()39 DepsLog::~DepsLog() {
40 Close();
41 }
42
OpenForWrite(const string & path,string * err)43 bool DepsLog::OpenForWrite(const string& path, string* err) {
44 if (needs_recompaction_) {
45 if (!Recompact(path, err))
46 return false;
47 }
48
49 file_ = fopen(path.c_str(), "ab");
50 if (!file_) {
51 *err = strerror(errno);
52 return false;
53 }
54 // Set the buffer size to this and flush the file buffer after every record
55 // to make sure records aren't written partially.
56 setvbuf(file_, NULL, _IOFBF, kMaxRecordSize + 1);
57 SetCloseOnExec(fileno(file_));
58
59 // Opening a file in append mode doesn't set the file pointer to the file's
60 // end on Windows. Do that explicitly.
61 fseek(file_, 0, SEEK_END);
62
63 if (ftell(file_) == 0) {
64 if (fwrite(kFileSignature, sizeof(kFileSignature) - 1, 1, file_) < 1) {
65 *err = strerror(errno);
66 return false;
67 }
68 if (fwrite(&kCurrentVersion, 4, 1, file_) < 1) {
69 *err = strerror(errno);
70 return false;
71 }
72 }
73 if (fflush(file_) != 0) {
74 *err = strerror(errno);
75 return false;
76 }
77 return true;
78 }
79
RecordDeps(Node * node,TimeStamp mtime,const vector<Node * > & nodes)80 bool DepsLog::RecordDeps(Node* node, TimeStamp mtime,
81 const vector<Node*>& nodes) {
82 return RecordDeps(node, mtime, nodes.size(),
83 nodes.empty() ? NULL : (Node**)&nodes.front());
84 }
85
RecordDeps(Node * node,TimeStamp mtime,int node_count,Node ** nodes)86 bool DepsLog::RecordDeps(Node* node, TimeStamp mtime,
87 int node_count, Node** nodes) {
88 // Track whether there's any new data to be recorded.
89 bool made_change = false;
90
91 // Assign ids to all nodes that are missing one.
92 if (node->id() < 0) {
93 if (!RecordId(node))
94 return false;
95 made_change = true;
96 }
97 for (int i = 0; i < node_count; ++i) {
98 if (nodes[i]->id() < 0) {
99 if (!RecordId(nodes[i]))
100 return false;
101 made_change = true;
102 }
103 }
104
105 // See if the new data is different than the existing data, if any.
106 if (!made_change) {
107 Deps* deps = GetDeps(node);
108 if (!deps ||
109 deps->mtime != mtime ||
110 deps->node_count != node_count) {
111 made_change = true;
112 } else {
113 for (int i = 0; i < node_count; ++i) {
114 if (deps->nodes[i] != nodes[i]) {
115 made_change = true;
116 break;
117 }
118 }
119 }
120 }
121
122 // Don't write anything if there's no new info.
123 if (!made_change)
124 return true;
125
126 // Update on-disk representation.
127 unsigned size = 4 * (1 + 1 + node_count);
128 if (size > kMaxRecordSize) {
129 errno = ERANGE;
130 return false;
131 }
132 size |= 0x80000000; // Deps record: set high bit.
133 if (fwrite(&size, 4, 1, file_) < 1)
134 return false;
135 int id = node->id();
136 if (fwrite(&id, 4, 1, file_) < 1)
137 return false;
138 int timestamp = mtime;
139 if (fwrite(×tamp, 4, 1, file_) < 1)
140 return false;
141 for (int i = 0; i < node_count; ++i) {
142 id = nodes[i]->id();
143 if (fwrite(&id, 4, 1, file_) < 1)
144 return false;
145 }
146 if (fflush(file_) != 0)
147 return false;
148
149 // Update in-memory representation.
150 Deps* deps = new Deps(mtime, node_count);
151 for (int i = 0; i < node_count; ++i)
152 deps->nodes[i] = nodes[i];
153 UpdateDeps(node->id(), deps);
154
155 return true;
156 }
157
Close()158 void DepsLog::Close() {
159 if (file_)
160 fclose(file_);
161 file_ = NULL;
162 }
163
Load(const string & path,State * state,string * err)164 bool DepsLog::Load(const string& path, State* state, string* err) {
165 METRIC_RECORD(".ninja_deps load");
166 char buf[kMaxRecordSize + 1];
167 FILE* f = fopen(path.c_str(), "rb");
168 if (!f) {
169 if (errno == ENOENT)
170 return true;
171 *err = strerror(errno);
172 return false;
173 }
174
175 bool valid_header = true;
176 int version = 0;
177 if (!fgets(buf, sizeof(buf), f) || fread(&version, 4, 1, f) < 1)
178 valid_header = false;
179 // Note: For version differences, this should migrate to the new format.
180 // But the v1 format could sometimes (rarely) end up with invalid data, so
181 // don't migrate v1 to v3 to force a rebuild. (v2 only existed for a few days,
182 // and there was no release with it, so pretend that it never happened.)
183 if (!valid_header || strcmp(buf, kFileSignature) != 0 ||
184 version != kCurrentVersion) {
185 if (version == 1)
186 *err = "deps log version change; rebuilding";
187 else
188 *err = "bad deps log signature or version; starting over";
189 fclose(f);
190 unlink(path.c_str());
191 // Don't report this as a failure. An empty deps log will cause
192 // us to rebuild the outputs anyway.
193 return true;
194 }
195
196 long offset;
197 bool read_failed = false;
198 int unique_dep_record_count = 0;
199 int total_dep_record_count = 0;
200 for (;;) {
201 offset = ftell(f);
202
203 unsigned size;
204 if (fread(&size, 4, 1, f) < 1) {
205 if (!feof(f))
206 read_failed = true;
207 break;
208 }
209 bool is_deps = (size >> 31) != 0;
210 size = size & 0x7FFFFFFF;
211
212 if (fread(buf, size, 1, f) < 1 || size > kMaxRecordSize) {
213 read_failed = true;
214 break;
215 }
216
217 if (is_deps) {
218 assert(size % 4 == 0);
219 int* deps_data = reinterpret_cast<int*>(buf);
220 int out_id = deps_data[0];
221 int mtime = deps_data[1];
222 deps_data += 2;
223 int deps_count = (size / 4) - 2;
224
225 Deps* deps = new Deps(mtime, deps_count);
226 for (int i = 0; i < deps_count; ++i) {
227 assert(deps_data[i] < (int)nodes_.size());
228 assert(nodes_[deps_data[i]]);
229 deps->nodes[i] = nodes_[deps_data[i]];
230 }
231
232 total_dep_record_count++;
233 if (!UpdateDeps(out_id, deps))
234 ++unique_dep_record_count;
235 } else {
236 int path_size = size - 4;
237 assert(path_size > 0); // CanonicalizePath() rejects empty paths.
238 // There can be up to 3 bytes of padding.
239 if (buf[path_size - 1] == '\0') --path_size;
240 if (buf[path_size - 1] == '\0') --path_size;
241 if (buf[path_size - 1] == '\0') --path_size;
242 StringPiece subpath(buf, path_size);
243 // It is not necessary to pass in a correct slash_bits here. It will
244 // either be a Node that's in the manifest (in which case it will already
245 // have a correct slash_bits that GetNode will look up), or it is an
246 // implicit dependency from a .d which does not affect the build command
247 // (and so need not have its slashes maintained).
248 Node* node = state->GetNode(subpath, 0);
249
250 // Check that the expected index matches the actual index. This can only
251 // happen if two ninja processes write to the same deps log concurrently.
252 // (This uses unary complement to make the checksum look less like a
253 // dependency record entry.)
254 unsigned checksum = *reinterpret_cast<unsigned*>(buf + size - 4);
255 int expected_id = ~checksum;
256 int id = nodes_.size();
257 if (id != expected_id) {
258 read_failed = true;
259 break;
260 }
261
262 assert(node->id() < 0);
263 node->set_id(id);
264 nodes_.push_back(node);
265 }
266 }
267
268 if (read_failed) {
269 // An error occurred while loading; try to recover by truncating the
270 // file to the last fully-read record.
271 if (ferror(f)) {
272 *err = strerror(ferror(f));
273 } else {
274 *err = "premature end of file";
275 }
276 fclose(f);
277
278 if (!Truncate(path, offset, err))
279 return false;
280
281 // The truncate succeeded; we'll just report the load error as a
282 // warning because the build can proceed.
283 *err += "; recovering";
284 return true;
285 }
286
287 fclose(f);
288
289 // Rebuild the log if there are too many dead records.
290 int kMinCompactionEntryCount = 1000;
291 int kCompactionRatio = 3;
292 if (total_dep_record_count > kMinCompactionEntryCount &&
293 total_dep_record_count > unique_dep_record_count * kCompactionRatio) {
294 needs_recompaction_ = true;
295 }
296
297 return true;
298 }
299
GetDeps(Node * node)300 DepsLog::Deps* DepsLog::GetDeps(Node* node) {
301 // Abort if the node has no id (never referenced in the deps) or if
302 // there's no deps recorded for the node.
303 if (node->id() < 0 || node->id() >= (int)deps_.size())
304 return NULL;
305 return deps_[node->id()];
306 }
307
Recompact(const string & path,string * err)308 bool DepsLog::Recompact(const string& path, string* err) {
309 METRIC_RECORD(".ninja_deps recompact");
310
311 Close();
312 string temp_path = path + ".recompact";
313
314 // OpenForWrite() opens for append. Make sure it's not appending to a
315 // left-over file from a previous recompaction attempt that crashed somehow.
316 unlink(temp_path.c_str());
317
318 DepsLog new_log;
319 if (!new_log.OpenForWrite(temp_path, err))
320 return false;
321
322 // Clear all known ids so that new ones can be reassigned. The new indices
323 // will refer to the ordering in new_log, not in the current log.
324 for (vector<Node*>::iterator i = nodes_.begin(); i != nodes_.end(); ++i)
325 (*i)->set_id(-1);
326
327 // Write out all deps again.
328 for (int old_id = 0; old_id < (int)deps_.size(); ++old_id) {
329 Deps* deps = deps_[old_id];
330 if (!deps) continue; // If nodes_[old_id] is a leaf, it has no deps.
331
332 if (!IsDepsEntryLiveFor(nodes_[old_id]))
333 continue;
334
335 if (!new_log.RecordDeps(nodes_[old_id], deps->mtime,
336 deps->node_count, deps->nodes)) {
337 new_log.Close();
338 return false;
339 }
340 }
341
342 new_log.Close();
343
344 // All nodes now have ids that refer to new_log, so steal its data.
345 deps_.swap(new_log.deps_);
346 nodes_.swap(new_log.nodes_);
347
348 if (unlink(path.c_str()) < 0) {
349 *err = strerror(errno);
350 return false;
351 }
352
353 if (rename(temp_path.c_str(), path.c_str()) < 0) {
354 *err = strerror(errno);
355 return false;
356 }
357
358 return true;
359 }
360
IsDepsEntryLiveFor(Node * node)361 bool DepsLog::IsDepsEntryLiveFor(Node* node) {
362 // Skip entries that don't have in-edges or whose edges don't have a
363 // "deps" attribute. They were in the deps log from previous builds, but
364 // the the files they were for were removed from the build and their deps
365 // entries are no longer needed.
366 // (Without the check for "deps", a chain of two or more nodes that each
367 // had deps wouldn't be collected in a single recompaction.)
368 return node->in_edge() && !node->in_edge()->GetBinding("deps").empty();
369 }
370
UpdateDeps(int out_id,Deps * deps)371 bool DepsLog::UpdateDeps(int out_id, Deps* deps) {
372 if (out_id >= (int)deps_.size())
373 deps_.resize(out_id + 1);
374
375 bool delete_old = deps_[out_id] != NULL;
376 if (delete_old)
377 delete deps_[out_id];
378 deps_[out_id] = deps;
379 return delete_old;
380 }
381
RecordId(Node * node)382 bool DepsLog::RecordId(Node* node) {
383 int path_size = node->path().size();
384 int padding = (4 - path_size % 4) % 4; // Pad path to 4 byte boundary.
385
386 unsigned size = path_size + padding + 4;
387 if (size > kMaxRecordSize) {
388 errno = ERANGE;
389 return false;
390 }
391 if (fwrite(&size, 4, 1, file_) < 1)
392 return false;
393 if (fwrite(node->path().data(), path_size, 1, file_) < 1) {
394 assert(node->path().size() > 0);
395 return false;
396 }
397 if (padding && fwrite("\0\0", padding, 1, file_) < 1)
398 return false;
399 int id = nodes_.size();
400 unsigned checksum = ~(unsigned)id;
401 if (fwrite(&checksum, 4, 1, file_) < 1)
402 return false;
403 if (fflush(file_) != 0)
404 return false;
405
406 node->set_id(id);
407 nodes_.push_back(node);
408
409 return true;
410 }
411