1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #ifdef _MSC_VER
36 #include <io.h>
37 #else
38 #include <unistd.h>
39 #endif
40 #include <sys/types.h>
41 #include <sys/stat.h>
42 #include <fcntl.h>
43 #include <errno.h>
44 
45 #include <algorithm>
46 #include <memory>
47 
48 #include <google/protobuf/compiler/importer.h>
49 
50 #include <google/protobuf/compiler/parser.h>
51 #include <google/protobuf/io/tokenizer.h>
52 #include <google/protobuf/io/zero_copy_stream_impl.h>
53 #include <google/protobuf/stubs/strutil.h>
54 
55 namespace google {
56 namespace protobuf {
57 namespace compiler {
58 
59 #ifdef _WIN32
60 #ifndef F_OK
61 #define F_OK 00  // not defined by MSVC for whatever reason
62 #endif
63 #include <ctype.h>
64 #endif
65 
66 // Returns true if the text looks like a Windows-style absolute path, starting
67 // with a drive letter.  Example:  "C:\foo".  TODO(kenton):  Share this with
68 // copy in command_line_interface.cc?
IsWindowsAbsolutePath(const string & text)69 static bool IsWindowsAbsolutePath(const string& text) {
70 #if defined(_WIN32) || defined(__CYGWIN__)
71   return text.size() >= 3 && text[1] == ':' &&
72          isalpha(text[0]) &&
73          (text[2] == '/' || text[2] == '\\') &&
74          text.find_last_of(':') == 1;
75 #else
76   return false;
77 #endif
78 }
79 
~MultiFileErrorCollector()80 MultiFileErrorCollector::~MultiFileErrorCollector() {}
81 
82 // This class serves two purposes:
83 // - It implements the ErrorCollector interface (used by Tokenizer and Parser)
84 //   in terms of MultiFileErrorCollector, using a particular filename.
85 // - It lets us check if any errors have occurred.
86 class SourceTreeDescriptorDatabase::SingleFileErrorCollector
87     : public io::ErrorCollector {
88  public:
SingleFileErrorCollector(const string & filename,MultiFileErrorCollector * multi_file_error_collector)89   SingleFileErrorCollector(const string& filename,
90                            MultiFileErrorCollector* multi_file_error_collector)
91     : filename_(filename),
92       multi_file_error_collector_(multi_file_error_collector),
93       had_errors_(false) {}
~SingleFileErrorCollector()94   ~SingleFileErrorCollector() {}
95 
had_errors()96   bool had_errors() { return had_errors_; }
97 
98   // implements ErrorCollector ---------------------------------------
AddError(int line,int column,const string & message)99   void AddError(int line, int column, const string& message) {
100     if (multi_file_error_collector_ != NULL) {
101       multi_file_error_collector_->AddError(filename_, line, column, message);
102     }
103     had_errors_ = true;
104   }
105 
106  private:
107   string filename_;
108   MultiFileErrorCollector* multi_file_error_collector_;
109   bool had_errors_;
110 };
111 
112 // ===================================================================
113 
SourceTreeDescriptorDatabase(SourceTree * source_tree)114 SourceTreeDescriptorDatabase::SourceTreeDescriptorDatabase(
115     SourceTree* source_tree)
116   : source_tree_(source_tree),
117     error_collector_(NULL),
118     using_validation_error_collector_(false),
119     validation_error_collector_(this) {}
120 
~SourceTreeDescriptorDatabase()121 SourceTreeDescriptorDatabase::~SourceTreeDescriptorDatabase() {}
122 
FindFileByName(const string & filename,FileDescriptorProto * output)123 bool SourceTreeDescriptorDatabase::FindFileByName(
124     const string& filename, FileDescriptorProto* output) {
125   scoped_ptr<io::ZeroCopyInputStream> input(source_tree_->Open(filename));
126   if (input == NULL) {
127     if (error_collector_ != NULL) {
128       error_collector_->AddError(filename, -1, 0,
129                                  source_tree_->GetLastErrorMessage());
130     }
131     return false;
132   }
133 
134   // Set up the tokenizer and parser.
135   SingleFileErrorCollector file_error_collector(filename, error_collector_);
136   io::Tokenizer tokenizer(input.get(), &file_error_collector);
137 
138   Parser parser;
139   if (error_collector_ != NULL) {
140     parser.RecordErrorsTo(&file_error_collector);
141   }
142   if (using_validation_error_collector_) {
143     parser.RecordSourceLocationsTo(&source_locations_);
144   }
145 
146   // Parse it.
147   output->set_name(filename);
148   return parser.Parse(&tokenizer, output) &&
149          !file_error_collector.had_errors();
150 }
151 
FindFileContainingSymbol(const string & symbol_name,FileDescriptorProto * output)152 bool SourceTreeDescriptorDatabase::FindFileContainingSymbol(
153     const string& symbol_name, FileDescriptorProto* output) {
154   return false;
155 }
156 
FindFileContainingExtension(const string & containing_type,int field_number,FileDescriptorProto * output)157 bool SourceTreeDescriptorDatabase::FindFileContainingExtension(
158     const string& containing_type, int field_number,
159     FileDescriptorProto* output) {
160   return false;
161 }
162 
163 // -------------------------------------------------------------------
164 
165 SourceTreeDescriptorDatabase::ValidationErrorCollector::
ValidationErrorCollector(SourceTreeDescriptorDatabase * owner)166 ValidationErrorCollector(SourceTreeDescriptorDatabase* owner)
167   : owner_(owner) {}
168 
169 SourceTreeDescriptorDatabase::ValidationErrorCollector::
~ValidationErrorCollector()170 ~ValidationErrorCollector() {}
171 
AddError(const string & filename,const string & element_name,const Message * descriptor,ErrorLocation location,const string & message)172 void SourceTreeDescriptorDatabase::ValidationErrorCollector::AddError(
173     const string& filename,
174     const string& element_name,
175     const Message* descriptor,
176     ErrorLocation location,
177     const string& message) {
178   if (owner_->error_collector_ == NULL) return;
179 
180   int line, column;
181   owner_->source_locations_.Find(descriptor, location, &line, &column);
182   owner_->error_collector_->AddError(filename, line, column, message);
183 }
184 
185 // ===================================================================
186 
Importer(SourceTree * source_tree,MultiFileErrorCollector * error_collector)187 Importer::Importer(SourceTree* source_tree,
188                    MultiFileErrorCollector* error_collector)
189   : database_(source_tree),
190     pool_(&database_, database_.GetValidationErrorCollector()) {
191   pool_.EnforceWeakDependencies(true);
192   database_.RecordErrorsTo(error_collector);
193 }
194 
~Importer()195 Importer::~Importer() {}
196 
Import(const string & filename)197 const FileDescriptor* Importer::Import(const string& filename) {
198   return pool_.FindFileByName(filename);
199 }
200 
AddUnusedImportTrackFile(const string & file_name)201 void Importer::AddUnusedImportTrackFile(const string& file_name) {
202   pool_.AddUnusedImportTrackFile(file_name);
203 }
204 
ClearUnusedImportTrackFiles()205 void Importer::ClearUnusedImportTrackFiles() {
206   pool_.ClearUnusedImportTrackFiles();
207 }
208 
209 // ===================================================================
210 
~SourceTree()211 SourceTree::~SourceTree() {}
212 
GetLastErrorMessage()213 string SourceTree::GetLastErrorMessage() {
214   return "File not found.";
215 }
216 
DiskSourceTree()217 DiskSourceTree::DiskSourceTree() {}
218 
~DiskSourceTree()219 DiskSourceTree::~DiskSourceTree() {}
220 
LastChar(const string & str)221 static inline char LastChar(const string& str) {
222   return str[str.size() - 1];
223 }
224 
225 // Given a path, returns an equivalent path with these changes:
226 // - On Windows, any backslashes are replaced with forward slashes.
227 // - Any instances of the directory "." are removed.
228 // - Any consecutive '/'s are collapsed into a single slash.
229 // Note that the resulting string may be empty.
230 //
231 // TODO(kenton):  It would be nice to handle "..", e.g. so that we can figure
232 //   out that "foo/bar.proto" is inside "baz/../foo".  However, if baz is a
233 //   symlink or doesn't exist, then things get complicated, and we can't
234 //   actually determine this without investigating the filesystem, probably
235 //   in non-portable ways.  So, we punt.
236 //
237 // TODO(kenton):  It would be nice to use realpath() here except that it
238 //   resolves symbolic links.  This could cause problems if people place
239 //   symbolic links in their source tree.  For example, if you executed:
240 //     protoc --proto_path=foo foo/bar/baz.proto
241 //   then if foo/bar is a symbolic link, foo/bar/baz.proto will canonicalize
242 //   to a path which does not appear to be under foo, and thus the compiler
243 //   will complain that baz.proto is not inside the --proto_path.
CanonicalizePath(string path)244 static string CanonicalizePath(string path) {
245 #ifdef _WIN32
246   // The Win32 API accepts forward slashes as a path delimiter even though
247   // backslashes are standard.  Let's avoid confusion and use only forward
248   // slashes.
249   if (HasPrefixString(path, "\\\\")) {
250     // Avoid converting two leading backslashes.
251     path = "\\\\" + StringReplace(path.substr(2), "\\", "/", true);
252   } else {
253     path = StringReplace(path, "\\", "/", true);
254   }
255 #endif
256 
257   vector<string> canonical_parts;
258   vector<string> parts = Split(
259       path, "/", true);  // Note:  Removes empty parts.
260   for (int i = 0; i < parts.size(); i++) {
261     if (parts[i] == ".") {
262       // Ignore.
263     } else {
264       canonical_parts.push_back(parts[i]);
265     }
266   }
267   string result = Join(canonical_parts, "/");
268   if (!path.empty() && path[0] == '/') {
269     // Restore leading slash.
270     result = '/' + result;
271   }
272   if (!path.empty() && LastChar(path) == '/' &&
273       !result.empty() && LastChar(result) != '/') {
274     // Restore trailing slash.
275     result += '/';
276   }
277   return result;
278 }
279 
ContainsParentReference(const string & path)280 static inline bool ContainsParentReference(const string& path) {
281   return path == ".." ||
282          HasPrefixString(path, "../") ||
283          HasSuffixString(path, "/..") ||
284          path.find("/../") != string::npos;
285 }
286 
287 // Maps a file from an old location to a new one.  Typically, old_prefix is
288 // a virtual path and new_prefix is its corresponding disk path.  Returns
289 // false if the filename did not start with old_prefix, otherwise replaces
290 // old_prefix with new_prefix and stores the result in *result.  Examples:
291 //   string result;
292 //   assert(ApplyMapping("foo/bar", "", "baz", &result));
293 //   assert(result == "baz/foo/bar");
294 //
295 //   assert(ApplyMapping("foo/bar", "foo", "baz", &result));
296 //   assert(result == "baz/bar");
297 //
298 //   assert(ApplyMapping("foo", "foo", "bar", &result));
299 //   assert(result == "bar");
300 //
301 //   assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
302 //   assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
303 //   assert(!ApplyMapping("foobar", "foo", "baz", &result));
ApplyMapping(const string & filename,const string & old_prefix,const string & new_prefix,string * result)304 static bool ApplyMapping(const string& filename,
305                          const string& old_prefix,
306                          const string& new_prefix,
307                          string* result) {
308   if (old_prefix.empty()) {
309     // old_prefix matches any relative path.
310     if (ContainsParentReference(filename)) {
311       // We do not allow the file name to use "..".
312       return false;
313     }
314     if (HasPrefixString(filename, "/") ||
315         IsWindowsAbsolutePath(filename)) {
316       // This is an absolute path, so it isn't matched by the empty string.
317       return false;
318     }
319     result->assign(new_prefix);
320     if (!result->empty()) result->push_back('/');
321     result->append(filename);
322     return true;
323   } else if (HasPrefixString(filename, old_prefix)) {
324     // old_prefix is a prefix of the filename.  Is it the whole filename?
325     if (filename.size() == old_prefix.size()) {
326       // Yep, it's an exact match.
327       *result = new_prefix;
328       return true;
329     } else {
330       // Not an exact match.  Is the next character a '/'?  Otherwise,
331       // this isn't actually a match at all.  E.g. the prefix "foo/bar"
332       // does not match the filename "foo/barbaz".
333       int after_prefix_start = -1;
334       if (filename[old_prefix.size()] == '/') {
335         after_prefix_start = old_prefix.size() + 1;
336       } else if (filename[old_prefix.size() - 1] == '/') {
337         // old_prefix is never empty, and canonicalized paths never have
338         // consecutive '/' characters.
339         after_prefix_start = old_prefix.size();
340       }
341       if (after_prefix_start != -1) {
342         // Yep.  So the prefixes are directories and the filename is a file
343         // inside them.
344         string after_prefix = filename.substr(after_prefix_start);
345         if (ContainsParentReference(after_prefix)) {
346           // We do not allow the file name to use "..".
347           return false;
348         }
349         result->assign(new_prefix);
350         if (!result->empty()) result->push_back('/');
351         result->append(after_prefix);
352         return true;
353       }
354     }
355   }
356 
357   return false;
358 }
359 
MapPath(const string & virtual_path,const string & disk_path)360 void DiskSourceTree::MapPath(const string& virtual_path,
361                              const string& disk_path) {
362   mappings_.push_back(Mapping(virtual_path, CanonicalizePath(disk_path)));
363 }
364 
365 DiskSourceTree::DiskFileToVirtualFileResult
DiskFileToVirtualFile(const string & disk_file,string * virtual_file,string * shadowing_disk_file)366 DiskSourceTree::DiskFileToVirtualFile(
367     const string& disk_file,
368     string* virtual_file,
369     string* shadowing_disk_file) {
370   int mapping_index = -1;
371   string canonical_disk_file = CanonicalizePath(disk_file);
372 
373   for (int i = 0; i < mappings_.size(); i++) {
374     // Apply the mapping in reverse.
375     if (ApplyMapping(canonical_disk_file, mappings_[i].disk_path,
376                      mappings_[i].virtual_path, virtual_file)) {
377       // Success.
378       mapping_index = i;
379       break;
380     }
381   }
382 
383   if (mapping_index == -1) {
384     return NO_MAPPING;
385   }
386 
387   // Iterate through all mappings with higher precedence and verify that none
388   // of them map this file to some other existing file.
389   for (int i = 0; i < mapping_index; i++) {
390     if (ApplyMapping(*virtual_file, mappings_[i].virtual_path,
391                      mappings_[i].disk_path, shadowing_disk_file)) {
392       if (access(shadowing_disk_file->c_str(), F_OK) >= 0) {
393         // File exists.
394         return SHADOWED;
395       }
396     }
397   }
398   shadowing_disk_file->clear();
399 
400   // Verify that we can open the file.  Note that this also has the side-effect
401   // of verifying that we are not canonicalizing away any non-existent
402   // directories.
403   scoped_ptr<io::ZeroCopyInputStream> stream(OpenDiskFile(disk_file));
404   if (stream == NULL) {
405     return CANNOT_OPEN;
406   }
407 
408   return SUCCESS;
409 }
410 
VirtualFileToDiskFile(const string & virtual_file,string * disk_file)411 bool DiskSourceTree::VirtualFileToDiskFile(const string& virtual_file,
412                                            string* disk_file) {
413   scoped_ptr<io::ZeroCopyInputStream> stream(
414       OpenVirtualFile(virtual_file, disk_file));
415   return stream != NULL;
416 }
417 
Open(const string & filename)418 io::ZeroCopyInputStream* DiskSourceTree::Open(const string& filename) {
419   return OpenVirtualFile(filename, NULL);
420 }
421 
GetLastErrorMessage()422 string DiskSourceTree::GetLastErrorMessage() {
423   return last_error_message_;
424 }
425 
OpenVirtualFile(const string & virtual_file,string * disk_file)426 io::ZeroCopyInputStream* DiskSourceTree::OpenVirtualFile(
427     const string& virtual_file,
428     string* disk_file) {
429   if (virtual_file != CanonicalizePath(virtual_file) ||
430       ContainsParentReference(virtual_file)) {
431     // We do not allow importing of paths containing things like ".." or
432     // consecutive slashes since the compiler expects files to be uniquely
433     // identified by file name.
434     last_error_message_ = "Backslashes, consecutive slashes, \".\", or \"..\" "
435                           "are not allowed in the virtual path";
436     return NULL;
437   }
438 
439   for (int i = 0; i < mappings_.size(); i++) {
440     string temp_disk_file;
441     if (ApplyMapping(virtual_file, mappings_[i].virtual_path,
442                      mappings_[i].disk_path, &temp_disk_file)) {
443       io::ZeroCopyInputStream* stream = OpenDiskFile(temp_disk_file);
444       if (stream != NULL) {
445         if (disk_file != NULL) {
446           *disk_file = temp_disk_file;
447         }
448         return stream;
449       }
450 
451       if (errno == EACCES) {
452         // The file exists but is not readable.
453         last_error_message_ = "Read access is denied for file: " +
454                               temp_disk_file;
455         return NULL;
456       }
457     }
458   }
459   last_error_message_ = "File not found.";
460   return NULL;
461 }
462 
OpenDiskFile(const string & filename)463 io::ZeroCopyInputStream* DiskSourceTree::OpenDiskFile(
464     const string& filename) {
465   int file_descriptor;
466   do {
467     file_descriptor = open(filename.c_str(), O_RDONLY);
468   } while (file_descriptor < 0 && errno == EINTR);
469   if (file_descriptor >= 0) {
470     io::FileInputStream* result = new io::FileInputStream(file_descriptor);
471     result->SetCloseOnDelete(true);
472     return result;
473   } else {
474     return NULL;
475   }
476 }
477 
478 }  // namespace compiler
479 }  // namespace protobuf
480 }  // namespace google
481