1 // Copyright (c) 2006, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // file_id.cc: Return a unique identifier for a file
31 //
32 // See file_id.h for documentation
33 //
34 
35 #include "common/linux/file_id.h"
36 
37 #include <arpa/inet.h>
38 #include <assert.h>
39 #include <string.h>
40 
41 #include <algorithm>
42 #include <string>
43 
44 #include "common/linux/elf_gnu_compat.h"
45 #include "common/linux/elfutils.h"
46 #include "common/linux/linux_libc_support.h"
47 #include "common/linux/memory_mapped_file.h"
48 #include "common/using_std_string.h"
49 #include "third_party/lss/linux_syscall_support.h"
50 
51 namespace google_breakpad {
52 
53 // Used in a few places for backwards-compatibility.
54 const size_t kMDGUIDSize = sizeof(MDGUID);
55 
FileID(const char * path)56 FileID::FileID(const char* path) : path_(path) {}
57 
58 // ELF note name and desc are 32-bits word padded.
59 #define NOTE_PADDING(a) ((a + 3) & ~3)
60 
61 // These functions are also used inside the crashed process, so be safe
62 // and use the syscall/libc wrappers instead of direct syscalls or libc.
63 
ElfClassBuildIDNoteIdentifier(const void * section,size_t length,wasteful_vector<uint8_t> & identifier)64 static bool ElfClassBuildIDNoteIdentifier(const void *section, size_t length,
65                                           wasteful_vector<uint8_t>& identifier) {
66   static_assert(sizeof(ElfClass32::Nhdr) == sizeof(ElfClass64::Nhdr),
67                 "Elf32_Nhdr and Elf64_Nhdr should be the same");
68   typedef typename ElfClass32::Nhdr Nhdr;
69 
70   const void* section_end = reinterpret_cast<const char*>(section) + length;
71   const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section);
72   while (reinterpret_cast<const void *>(note_header) < section_end) {
73     if (note_header->n_type == NT_GNU_BUILD_ID)
74       break;
75     note_header = reinterpret_cast<const Nhdr*>(
76                   reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) +
77                   NOTE_PADDING(note_header->n_namesz) +
78                   NOTE_PADDING(note_header->n_descsz));
79   }
80   if (reinterpret_cast<const void *>(note_header) >= section_end ||
81       note_header->n_descsz == 0) {
82     return false;
83   }
84 
85   const uint8_t* build_id = reinterpret_cast<const uint8_t*>(note_header) +
86     sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz);
87   identifier.insert(identifier.end(),
88                     build_id,
89                     build_id + note_header->n_descsz);
90 
91   return true;
92 }
93 
94 // Attempt to locate a .note.gnu.build-id section in an ELF binary
95 // and copy it into |identifier|.
FindElfBuildIDNote(const void * elf_mapped_base,wasteful_vector<uint8_t> & identifier)96 static bool FindElfBuildIDNote(const void* elf_mapped_base,
97                                wasteful_vector<uint8_t>& identifier) {
98   PageAllocator allocator;
99   // lld normally creates 2 PT_NOTEs, gold normally creates 1.
100   auto_wasteful_vector<ElfSegment, 2> segs(&allocator);
101   if (FindElfSegments(elf_mapped_base, PT_NOTE, &segs)) {
102     for (ElfSegment& seg : segs) {
103       if (ElfClassBuildIDNoteIdentifier(seg.start, seg.size, identifier)) {
104         return true;
105       }
106     }
107   }
108 
109   void* note_section;
110   size_t note_size;
111   if (FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE,
112                      (const void**)&note_section, &note_size)) {
113     return ElfClassBuildIDNoteIdentifier(note_section, note_size, identifier);
114   }
115 
116   return false;
117 }
118 
119 // Attempt to locate the .text section of an ELF binary and generate
120 // a simple hash by XORing the first page worth of bytes into |identifier|.
HashElfTextSection(const void * elf_mapped_base,wasteful_vector<uint8_t> & identifier)121 static bool HashElfTextSection(const void* elf_mapped_base,
122                                wasteful_vector<uint8_t>& identifier) {
123   identifier.resize(kMDGUIDSize);
124 
125   void* text_section;
126   size_t text_size;
127   if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS,
128                       (const void**)&text_section, &text_size) ||
129       text_size == 0) {
130     return false;
131   }
132 
133   // Only provide |kMDGUIDSize| bytes to keep identifiers produced by this
134   // function backwards-compatible.
135   my_memset(&identifier[0], 0, kMDGUIDSize);
136   const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section);
137   const uint8_t* ptr_end = ptr + std::min(text_size, static_cast<size_t>(4096));
138   while (ptr < ptr_end) {
139     for (unsigned i = 0; i < kMDGUIDSize; i++)
140       identifier[i] ^= ptr[i];
141     ptr += kMDGUIDSize;
142   }
143   return true;
144 }
145 
146 // static
ElfFileIdentifierFromMappedFile(const void * base,wasteful_vector<uint8_t> & identifier)147 bool FileID::ElfFileIdentifierFromMappedFile(const void* base,
148                                              wasteful_vector<uint8_t>& identifier) {
149   // Look for a build id note first.
150   if (FindElfBuildIDNote(base, identifier))
151     return true;
152 
153   // Fall back on hashing the first page of the text section.
154   return HashElfTextSection(base, identifier);
155 }
156 
ElfFileIdentifier(wasteful_vector<uint8_t> & identifier)157 bool FileID::ElfFileIdentifier(wasteful_vector<uint8_t>& identifier) {
158   MemoryMappedFile mapped_file(path_.c_str(), 0);
159   if (!mapped_file.data())  // Should probably check if size >= ElfW(Ehdr)?
160     return false;
161 
162   return ElfFileIdentifierFromMappedFile(mapped_file.data(), identifier);
163 }
164 
165 // These three functions are not ever called in an unsafe context, so it's OK
166 // to allocate memory and use libc.
bytes_to_hex_string(const uint8_t * bytes,size_t count)167 static string bytes_to_hex_string(const uint8_t* bytes, size_t count) {
168   string result;
169   for (unsigned int idx = 0; idx < count; ++idx) {
170     char buf[3];
171     snprintf(buf, sizeof(buf), "%02X", bytes[idx]);
172     result.append(buf);
173   }
174   return result;
175 }
176 
177 // static
ConvertIdentifierToUUIDString(const wasteful_vector<uint8_t> & identifier)178 string FileID::ConvertIdentifierToUUIDString(
179     const wasteful_vector<uint8_t>& identifier) {
180   uint8_t identifier_swapped[kMDGUIDSize] = { 0 };
181 
182   // Endian-ness swap to match dump processor expectation.
183   memcpy(identifier_swapped, &identifier[0],
184          std::min(kMDGUIDSize, identifier.size()));
185   uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped);
186   *data1 = htonl(*data1);
187   uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4);
188   *data2 = htons(*data2);
189   uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6);
190   *data3 = htons(*data3);
191 
192   return bytes_to_hex_string(identifier_swapped, kMDGUIDSize);
193 }
194 
195 // static
ConvertIdentifierToString(const wasteful_vector<uint8_t> & identifier)196 string FileID::ConvertIdentifierToString(
197     const wasteful_vector<uint8_t>& identifier) {
198   return bytes_to_hex_string(&identifier[0], identifier.size());
199 }
200 
201 }  // namespace google_breakpad
202