1 // Copyright (c) 2006, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 // minidump.h: A minidump reader.
31 //
32 // The basic structure of this module tracks the structure of the minidump
33 // file itself.  At the top level, a minidump file is represented by a
34 // Minidump object.  Like most other classes in this module, Minidump
35 // provides a Read method that initializes the object with information from
36 // the file.  Most of the classes in this file are wrappers around the
37 // "raw" structures found in the minidump file itself, and defined in
38 // minidump_format.h.  For example, each thread is represented by a
39 // MinidumpThread object, whose parameters are specified in an MDRawThread
40 // structure.  A properly byte-swapped MDRawThread can be obtained from a
41 // MinidumpThread easily by calling its thread() method.
42 //
43 // Most of the module lazily reads only the portion of the minidump file
44 // necessary to fulfill the user's request.  Calling Minidump::Read
45 // only reads the minidump's directory.  The thread list is not read until
46 // it is needed, and even once it's read, the memory regions for each
47 // thread's stack aren't read until they're needed.  This strategy avoids
48 // unnecessary file input, and allocating memory for data in which the user
49 // has no interest.  Note that although memory allocations for a typical
50 // minidump file are not particularly large, it is possible for legitimate
51 // minidumps to be sizable.  A full-memory minidump, for example, contains
52 // a snapshot of the entire mapped memory space.  Even a normal minidump,
53 // with stack memory only, can be large if, for example, the dump was
54 // generated in response to a crash that occurred due to an infinite-
55 // recursion bug that caused the stack's limits to be exceeded.  Finally,
56 // some users of this library will unfortunately find themselves in the
57 // position of having to process potentially-hostile minidumps that might
58 // attempt to cause problems by forcing the minidump processor to over-
59 // allocate memory.
60 //
61 // Memory management in this module is based on a strict
62 // you-don't-own-anything policy.  The only object owned by the user is
63 // the top-level Minidump object, the creation and destruction of which
64 // must be the user's own responsibility.  All other objects obtained
65 // through interaction with this module are ultimately owned by the
66 // Minidump object, and will be freed upon the Minidump object's destruction.
67 // Because memory regions can potentially involve large allocations, a
68 // FreeMemory method is provided by MinidumpMemoryRegion, allowing the user
69 // to release data when it is no longer needed.  Use of this method is
70 // optional but recommended.  If freed data is later required, it will
71 // be read back in from the minidump file again.
72 //
73 // There is one exception to this memory management policy:
74 // Minidump::ReadString will return a string object to the user, and the user
75 // is responsible for its deletion.
76 //
77 // Author: Mark Mentovai
78 
79 #ifndef GOOGLE_BREAKPAD_PROCESSOR_MINIDUMP_H__
80 #define GOOGLE_BREAKPAD_PROCESSOR_MINIDUMP_H__
81 
82 #ifdef _MSC_VER
83 // for lseek
84 #include <io.h>
85 #define lseek _lseek
86 #endif
87 
88 #include <map>
89 #include <string>
90 #include <vector>
91 
92 #include "google_breakpad/common/minidump_format.h"
93 #include "google_breakpad/processor/code_module.h"
94 #include "google_breakpad/processor/code_modules.h"
95 #include "google_breakpad/processor/memory_region.h"
96 
97 
98 namespace google_breakpad {
99 
100 
101 using std::map;
102 using std::string;
103 using std::vector;
104 
105 
106 class Minidump;
107 template<typename AddressType, typename EntryType> class RangeMap;
108 
109 
110 // MinidumpObject is the base of all Minidump* objects except for Minidump
111 // itself.
112 class MinidumpObject {
113  public:
~MinidumpObject()114   virtual ~MinidumpObject() {}
115 
valid()116   bool valid() const { return valid_; }
117 
118  protected:
119   explicit MinidumpObject(Minidump* minidump);
120 
121   // Refers to the Minidump object that is the ultimate parent of this
122   // Some MinidumpObjects are owned by other MinidumpObjects, but at the
123   // root of the ownership tree is always a Minidump.  The Minidump object
124   // is kept here for access to its seeking and reading facilities, and
125   // for access to data about the minidump file itself, such as whether
126   // it should be byte-swapped.
127   Minidump* minidump_;
128 
129   // MinidumpObjects are not valid when created.  When a subclass populates
130   // its own fields, it can set valid_ to true.  Accessors and mutators may
131   // wish to consider or alter the valid_ state as they interact with
132   // objects.
133   bool      valid_;
134 };
135 
136 
137 // This class exists primarily to provide a virtual destructor in a base
138 // class common to all objects that might be stored in
139 // Minidump::mStreamObjects.  Some object types (MinidumpContext) will
140 // never be stored in Minidump::mStreamObjects, but are represented as
141 // streams and adhere to the same interface, and may be derived from
142 // this class.
143 class MinidumpStream : public MinidumpObject {
144  public:
~MinidumpStream()145   virtual ~MinidumpStream() {}
146 
147  protected:
148   explicit MinidumpStream(Minidump* minidump);
149 
150  private:
151   // Populate (and validate) the MinidumpStream.  minidump_ is expected
152   // to be positioned at the beginning of the stream, so that the next
153   // read from the minidump will be at the beginning of the stream.
154   // expected_size should be set to the stream's length as contained in
155   // the MDRawDirectory record or other identifying record.  A class
156   // that implements MinidumpStream can compare expected_size to a
157   // known size as an integrity check.
158   virtual bool Read(u_int32_t expected_size) = 0;
159 };
160 
161 
162 // MinidumpContext carries a CPU-specific MDRawContext structure, which
163 // contains CPU context such as register states.  Each thread has its
164 // own context, and the exception record, if present, also has its own
165 // context.  Note that if the exception record is present, the context it
166 // refers to is probably what the user wants to use for the exception
167 // thread, instead of that thread's own context.  The exception thread's
168 // context (as opposed to the exception record's context) will contain
169 // context for the exception handler (which performs minidump generation),
170 // and not the context that caused the exception (which is probably what the
171 // user wants).
172 class MinidumpContext : public MinidumpStream {
173  public:
174   virtual ~MinidumpContext();
175 
176   // Returns an MD_CONTEXT_* value such as MD_CONTEXT_X86 or MD_CONTEXT_PPC
177   // identifying the CPU type that the context was collected from.  The
178   // returned value will identify the CPU only, and will have any other
179   // MD_CONTEXT_* bits masked out.  Returns 0 on failure.
180   u_int32_t GetContextCPU() const;
181 
182   // Returns raw CPU-specific context data for the named CPU type.  If the
183   // context data does not match the CPU type or does not exist, returns
184   // NULL.
185   const MDRawContextX86*   GetContextX86() const;
186   const MDRawContextPPC*   GetContextPPC() const;
187   const MDRawContextAMD64* GetContextAMD64() const;
188   const MDRawContextSPARC* GetContextSPARC() const;
189 
190   // Print a human-readable representation of the object to stdout.
191   void Print();
192 
193  private:
194   friend class MinidumpThread;
195   friend class MinidumpException;
196 
197   explicit MinidumpContext(Minidump* minidump);
198 
199   bool Read(u_int32_t expected_size);
200 
201   // Free the CPU-specific context structure.
202   void FreeContext();
203 
204   // If the minidump contains a SYSTEM_INFO_STREAM, makes sure that the
205   // system info stream gives an appropriate CPU type matching the context
206   // CPU type in context_cpu_type.  Returns false if the CPU type does not
207   // match.  Returns true if the CPU type matches or if the minidump does
208   // not contain a system info stream.
209   bool CheckAgainstSystemInfo(u_int32_t context_cpu_type);
210 
211   // Store this separately because of the weirdo AMD64 context
212   u_int32_t context_flags_;
213 
214   // The CPU-specific context structure.
215   union {
216     MDRawContextBase*  base;
217     MDRawContextX86*   x86;
218     MDRawContextPPC*   ppc;
219     MDRawContextAMD64* amd64;
220     // on Solaris SPARC, sparc is defined as a numeric constant,
221     // so variables can NOT be named as sparc
222     MDRawContextSPARC*  ctx_sparc;
223   } context_;
224 };
225 
226 
227 // MinidumpMemoryRegion does not wrap any MDRaw structure, and only contains
228 // a reference to an MDMemoryDescriptor.  This object is intended to wrap
229 // portions of a minidump file that contain memory dumps.  In normal
230 // minidumps, each MinidumpThread owns a MinidumpMemoryRegion corresponding
231 // to the thread's stack memory.  MinidumpMemoryList also gives access to
232 // memory regions in its list as MinidumpMemoryRegions.  This class
233 // adheres to MemoryRegion so that it may be used as a data provider to
234 // the Stackwalker family of classes.
235 class MinidumpMemoryRegion : public MinidumpObject,
236                              public MemoryRegion {
237  public:
238   virtual ~MinidumpMemoryRegion();
239 
set_max_bytes(u_int32_t max_bytes)240   static void set_max_bytes(u_int32_t max_bytes) { max_bytes_ = max_bytes; }
max_bytes()241   static u_int32_t max_bytes() { return max_bytes_; }
242 
243   // Returns a pointer to the base of the memory region.  Returns the
244   // cached value if available, otherwise, reads the minidump file and
245   // caches the memory region.
246   const u_int8_t* GetMemory();
247 
248   // The address of the base of the memory region.
249   u_int64_t GetBase();
250 
251   // The size, in bytes, of the memory region.
252   u_int32_t GetSize();
253 
254   // Frees the cached memory region, if cached.
255   void FreeMemory();
256 
257   // Obtains the value of memory at the pointer specified by address.
258   bool GetMemoryAtAddress(u_int64_t address, u_int8_t*  value);
259   bool GetMemoryAtAddress(u_int64_t address, u_int16_t* value);
260   bool GetMemoryAtAddress(u_int64_t address, u_int32_t* value);
261   bool GetMemoryAtAddress(u_int64_t address, u_int64_t* value);
262 
263   // Print a human-readable representation of the object to stdout.
264   void Print();
265 
266  private:
267   friend class MinidumpThread;
268   friend class MinidumpMemoryList;
269 
270   explicit MinidumpMemoryRegion(Minidump* minidump);
271 
272   // Identify the base address and size of the memory region, and the
273   // location it may be found in the minidump file.
274   void SetDescriptor(MDMemoryDescriptor* descriptor);
275 
276   // Implementation for GetMemoryAtAddress
277   template<typename T> bool GetMemoryAtAddressInternal(u_int64_t address,
278                                                        T*        value);
279 
280   // The largest memory region that will be read from a minidump.  The
281   // default is 1MB.
282   static u_int32_t max_bytes_;
283 
284   // Base address and size of the memory region, and its position in the
285   // minidump file.
286   MDMemoryDescriptor* descriptor_;
287 
288   // Cached memory.
289   vector<u_int8_t>*   memory_;
290 };
291 
292 
293 // MinidumpThread contains information about a thread of execution,
294 // including a snapshot of the thread's stack and CPU context.  For
295 // the thread that caused an exception, the context carried by
296 // MinidumpException is probably desired instead of the CPU context
297 // provided here.
298 class MinidumpThread : public MinidumpObject {
299  public:
300   virtual ~MinidumpThread();
301 
thread()302   const MDRawThread* thread() const { return valid_ ? &thread_ : NULL; }
303   MinidumpMemoryRegion* GetMemory();
304   MinidumpContext* GetContext();
305 
306   // The thread ID is used to determine if a thread is the exception thread,
307   // so a special getter is provided to retrieve this data from the
308   // MDRawThread structure.  Returns false if the thread ID cannot be
309   // determined.
310   bool GetThreadID(u_int32_t *thread_id) const;
311 
312   // Print a human-readable representation of the object to stdout.
313   void Print();
314 
315  private:
316   // These objects are managed by MinidumpThreadList.
317   friend class MinidumpThreadList;
318 
319   explicit MinidumpThread(Minidump* minidump);
320 
321   // This works like MinidumpStream::Read, but is driven by
322   // MinidumpThreadList.  No size checking is done, because
323   // MinidumpThreadList handles that directly.
324   bool Read();
325 
326   MDRawThread           thread_;
327   MinidumpMemoryRegion* memory_;
328   MinidumpContext*      context_;
329 };
330 
331 
332 // MinidumpThreadList contains all of the threads (as MinidumpThreads) in
333 // a process.
334 class MinidumpThreadList : public MinidumpStream {
335  public:
336   virtual ~MinidumpThreadList();
337 
set_max_threads(u_int32_t max_threads)338   static void set_max_threads(u_int32_t max_threads) {
339     max_threads_ = max_threads;
340   }
max_threads()341   static u_int32_t max_threads() { return max_threads_; }
342 
thread_count()343   unsigned int thread_count() const {
344     return valid_ ? thread_count_ : 0;
345   }
346 
347   // Sequential access to threads.
348   MinidumpThread* GetThreadAtIndex(unsigned int index) const;
349 
350   // Random access to threads.
351   MinidumpThread* GetThreadByID(u_int32_t thread_id);
352 
353   // Print a human-readable representation of the object to stdout.
354   void Print();
355 
356  private:
357   friend class Minidump;
358 
359   typedef map<u_int32_t, MinidumpThread*> IDToThreadMap;
360   typedef vector<MinidumpThread> MinidumpThreads;
361 
362   static const u_int32_t kStreamType = MD_THREAD_LIST_STREAM;
363 
364   explicit MinidumpThreadList(Minidump* aMinidump);
365 
366   bool Read(u_int32_t aExpectedSize);
367 
368   // The largest number of threads that will be read from a minidump.  The
369   // default is 256.
370   static u_int32_t max_threads_;
371 
372   // Access to threads using the thread ID as the key.
373   IDToThreadMap    id_to_thread_map_;
374 
375   // The list of threads.
376   MinidumpThreads* threads_;
377   u_int32_t        thread_count_;
378 };
379 
380 
381 // MinidumpModule wraps MDRawModule, which contains information about loaded
382 // code modules.  Access is provided to various data referenced indirectly
383 // by MDRawModule, such as the module's name and a specification for where
384 // to locate debugging information for the module.
385 class MinidumpModule : public MinidumpObject,
386                        public CodeModule {
387  public:
388   virtual ~MinidumpModule();
389 
set_max_cv_bytes(u_int32_t max_cv_bytes)390   static void set_max_cv_bytes(u_int32_t max_cv_bytes) {
391     max_cv_bytes_ = max_cv_bytes;
392   }
max_cv_bytes()393   static u_int32_t max_cv_bytes() { return max_cv_bytes_; }
394 
set_max_misc_bytes(u_int32_t max_misc_bytes)395   static void set_max_misc_bytes(u_int32_t max_misc_bytes) {
396     max_misc_bytes_ = max_misc_bytes;
397   }
max_misc_bytes()398   static u_int32_t max_misc_bytes() { return max_misc_bytes_; }
399 
module()400   const MDRawModule* module() const { return valid_ ? &module_ : NULL; }
401 
402   // CodeModule implementation
base_address()403   virtual u_int64_t base_address() const {
404     return valid_ ? module_.base_of_image : static_cast<u_int64_t>(-1);
405   }
size()406   virtual u_int64_t size() const { return valid_ ? module_.size_of_image : 0; }
407   virtual string code_file() const;
408   virtual string code_identifier() const;
409   virtual string debug_file() const;
410   virtual string debug_identifier() const;
411   virtual string version() const;
412   virtual const CodeModule* Copy() const;
413 
414   // The CodeView record, which contains information to locate the module's
415   // debugging information (pdb).  This is returned as u_int8_t* because
416   // the data can be of types MDCVInfoPDB20* or MDCVInfoPDB70*, or it may be
417   // of a type unknown to Breakpad, in which case the raw data will still be
418   // returned but no byte-swapping will have been performed.  Check the
419   // record's signature in the first four bytes to differentiate between
420   // the various types.  Current toolchains generate modules which carry
421   // MDCVInfoPDB70 by default.  Returns a pointer to the CodeView record on
422   // success, and NULL on failure.  On success, the optional |size| argument
423   // is set to the size of the CodeView record.
424   const u_int8_t* GetCVRecord(u_int32_t* size);
425 
426   // The miscellaneous debug record, which is obsolete.  Current toolchains
427   // do not generate this type of debugging information (dbg), and this
428   // field is not expected to be present.  Returns a pointer to the debugging
429   // record on success, and NULL on failure.  On success, the optional |size|
430   // argument is set to the size of the debugging record.
431   const MDImageDebugMisc* GetMiscRecord(u_int32_t* size);
432 
433   // Print a human-readable representation of the object to stdout.
434   void Print();
435 
436  private:
437   // These objects are managed by MinidumpModuleList.
438   friend class MinidumpModuleList;
439 
440   explicit MinidumpModule(Minidump* minidump);
441 
442   // This works like MinidumpStream::Read, but is driven by
443   // MinidumpModuleList.  No size checking is done, because
444   // MinidumpModuleList handles that directly.
445   bool Read();
446 
447   // Reads indirectly-referenced data, including the module name, CodeView
448   // record, and miscellaneous debugging record.  This is necessary to allow
449   // MinidumpModuleList to fully construct MinidumpModule objects without
450   // requiring seeks to read a contiguous set of MinidumpModule objects.
451   // All auxiliary data should be available when Read is called, in order to
452   // allow the CodeModule getters to be const methods.
453   bool ReadAuxiliaryData();
454 
455   // The largest number of bytes that will be read from a minidump for a
456   // CodeView record or miscellaneous debugging record, respectively.  The
457   // default for each is 1024.
458   static u_int32_t max_cv_bytes_;
459   static u_int32_t max_misc_bytes_;
460 
461   // True after a successful Read.  This is different from valid_, which is
462   // not set true until ReadAuxiliaryData also completes successfully.
463   // module_valid_ is only used by ReadAuxiliaryData and the functions it
464   // calls to determine whether the object is ready for auxiliary data to
465   // be read.
466   bool              module_valid_;
467 
468   // True if debug info was read from the module.  Certain modules
469   // may contain debug records in formats we don't support,
470   // so we can just set this to false to ignore them.
471   bool              has_debug_info_;
472 
473   MDRawModule       module_;
474 
475   // Cached module name.
476   const string*     name_;
477 
478   // Cached CodeView record - this is MDCVInfoPDB20 or (likely)
479   // MDCVInfoPDB70, or possibly something else entirely.  Stored as a u_int8_t
480   // because the structure contains a variable-sized string and its exact
481   // size cannot be known until it is processed.
482   vector<u_int8_t>* cv_record_;
483 
484   // If cv_record_ is present, cv_record_signature_ contains a copy of the
485   // CodeView record's first four bytes, for ease of determinining the
486   // type of structure that cv_record_ contains.
487   u_int32_t cv_record_signature_;
488 
489   // Cached MDImageDebugMisc (usually not present), stored as u_int8_t
490   // because the structure contains a variable-sized string and its exact
491   // size cannot be known until it is processed.
492   vector<u_int8_t>* misc_record_;
493 };
494 
495 
496 // MinidumpModuleList contains all of the loaded code modules for a process
497 // in the form of MinidumpModules.  It maintains a map of these modules
498 // so that it may easily provide a code module corresponding to a specific
499 // address.
500 class MinidumpModuleList : public MinidumpStream,
501                            public CodeModules {
502  public:
503   virtual ~MinidumpModuleList();
504 
set_max_modules(u_int32_t max_modules)505   static void set_max_modules(u_int32_t max_modules) {
506     max_modules_ = max_modules;
507   }
max_modules()508   static u_int32_t max_modules() { return max_modules_; }
509 
510   // CodeModules implementation.
module_count()511   virtual unsigned int module_count() const {
512     return valid_ ? module_count_ : 0;
513   }
514   virtual const MinidumpModule* GetModuleForAddress(u_int64_t address) const;
515   virtual const MinidumpModule* GetMainModule() const;
516   virtual const MinidumpModule* GetModuleAtSequence(
517       unsigned int sequence) const;
518   virtual const MinidumpModule* GetModuleAtIndex(unsigned int index) const;
519   virtual const CodeModules* Copy() const;
520 
521   // Print a human-readable representation of the object to stdout.
522   void Print();
523 
524  private:
525   friend class Minidump;
526 
527   typedef vector<MinidumpModule> MinidumpModules;
528 
529   static const u_int32_t kStreamType = MD_MODULE_LIST_STREAM;
530 
531   explicit MinidumpModuleList(Minidump* minidump);
532 
533   bool Read(u_int32_t expected_size);
534 
535   // The largest number of modules that will be read from a minidump.  The
536   // default is 1024.
537   static u_int32_t max_modules_;
538 
539   // Access to modules using addresses as the key.
540   RangeMap<u_int64_t, unsigned int> *range_map_;
541 
542   MinidumpModules *modules_;
543   u_int32_t module_count_;
544 };
545 
546 
547 // MinidumpMemoryList corresponds to a minidump's MEMORY_LIST_STREAM stream,
548 // which references the snapshots of all of the memory regions contained
549 // within the minidump.  For a normal minidump, this includes stack memory
550 // (also referenced by each MinidumpThread, in fact, the MDMemoryDescriptors
551 // here and in MDRawThread both point to exactly the same data in a
552 // minidump file, conserving space), as well as a 256-byte snapshot of memory
553 // surrounding the instruction pointer in the case of an exception.  Other
554 // types of minidumps may contain significantly more memory regions.  Full-
555 // memory minidumps contain all of a process' mapped memory.
556 class MinidumpMemoryList : public MinidumpStream {
557  public:
558   virtual ~MinidumpMemoryList();
559 
set_max_regions(u_int32_t max_regions)560   static void set_max_regions(u_int32_t max_regions) {
561     max_regions_ = max_regions;
562   }
max_regions()563   static u_int32_t max_regions() { return max_regions_; }
564 
region_count()565   unsigned int region_count() const { return valid_ ? region_count_ : 0; }
566 
567   // Sequential access to memory regions.
568   MinidumpMemoryRegion* GetMemoryRegionAtIndex(unsigned int index);
569 
570   // Random access to memory regions.  Returns the region encompassing
571   // the address identified by address.
572   MinidumpMemoryRegion* GetMemoryRegionForAddress(u_int64_t address);
573 
574   // Print a human-readable representation of the object to stdout.
575   void Print();
576 
577  private:
578   friend class Minidump;
579 
580   typedef vector<MDMemoryDescriptor>   MemoryDescriptors;
581   typedef vector<MinidumpMemoryRegion> MemoryRegions;
582 
583   static const u_int32_t kStreamType = MD_MEMORY_LIST_STREAM;
584 
585   explicit MinidumpMemoryList(Minidump* minidump);
586 
587   bool Read(u_int32_t expected_size);
588 
589   // The largest number of memory regions that will be read from a minidump.
590   // The default is 256.
591   static u_int32_t max_regions_;
592 
593   // Access to memory regions using addresses as the key.
594   RangeMap<u_int64_t, unsigned int> *range_map_;
595 
596   // The list of descriptors.  This is maintained separately from the list
597   // of regions, because MemoryRegion doesn't own its MemoryDescriptor, it
598   // maintains a pointer to it.  descriptors_ provides the storage for this
599   // purpose.
600   MemoryDescriptors *descriptors_;
601 
602   // The list of regions.
603   MemoryRegions *regions_;
604   u_int32_t region_count_;
605 };
606 
607 
608 // MinidumpException wraps MDRawExceptionStream, which contains information
609 // about the exception that caused the minidump to be generated, if the
610 // minidump was generated in an exception handler called as a result of
611 // an exception.  It also provides access to a MinidumpContext object,
612 // which contains the CPU context for the exception thread at the time
613 // the exception occurred.
614 class MinidumpException : public MinidumpStream {
615  public:
616   virtual ~MinidumpException();
617 
exception()618   const MDRawExceptionStream* exception() const {
619     return valid_ ? &exception_ : NULL;
620   }
621 
622   // The thread ID is used to determine if a thread is the exception thread,
623   // so a special getter is provided to retrieve this data from the
624   // MDRawExceptionStream structure.  Returns false if the thread ID cannot
625   // be determined.
626   bool GetThreadID(u_int32_t *thread_id) const;
627 
628   MinidumpContext* GetContext();
629 
630   // Print a human-readable representation of the object to stdout.
631   void Print();
632 
633  private:
634   friend class Minidump;
635 
636   static const u_int32_t kStreamType = MD_EXCEPTION_STREAM;
637 
638   explicit MinidumpException(Minidump* minidump);
639 
640   bool Read(u_int32_t expected_size);
641 
642   MDRawExceptionStream exception_;
643   MinidumpContext*     context_;
644 };
645 
646 
647 // MinidumpSystemInfo wraps MDRawSystemInfo and provides information about
648 // the system on which the minidump was generated.  See also MinidumpMiscInfo.
649 class MinidumpSystemInfo : public MinidumpStream {
650  public:
651   virtual ~MinidumpSystemInfo();
652 
system_info()653   const MDRawSystemInfo* system_info() const {
654     return valid_ ? &system_info_ : NULL;
655   }
656 
657   // GetOS and GetCPU return textual representations of the operating system
658   // and CPU that produced the minidump.  Unlike most other Minidump* methods,
659   // they return string objects, not weak pointers.  Defined values for
660   // GetOS() are "mac", "windows", and "linux".  Defined values for GetCPU
661   // are "x86" and "ppc".  These methods return an empty string when their
662   // values are unknown.
663   string GetOS();
664   string GetCPU();
665 
666   // I don't know what CSD stands for, but this field is documented as
667   // returning a textual representation of the OS service pack.  On other
668   // platforms, this provides additional information about an OS version
669   // level beyond major.minor.micro.  Returns NULL if unknown.
670   const string* GetCSDVersion();
671 
672   // If a CPU vendor string can be determined, returns a pointer to it,
673   // otherwise, returns NULL.  CPU vendor strings can be determined from
674   // x86 CPUs with CPUID 0.
675   const string* GetCPUVendor();
676 
677   // Print a human-readable representation of the object to stdout.
678   void Print();
679 
680  private:
681   friend class Minidump;
682 
683   static const u_int32_t kStreamType = MD_SYSTEM_INFO_STREAM;
684 
685   explicit MinidumpSystemInfo(Minidump* minidump);
686 
687   bool Read(u_int32_t expected_size);
688 
689   MDRawSystemInfo system_info_;
690 
691   // Textual representation of the OS service pack, for minidumps produced
692   // by MiniDumpWriteDump on Windows.
693   const string* csd_version_;
694 
695   // A string identifying the CPU vendor, if known.
696   const string* cpu_vendor_;
697 };
698 
699 
700 // MinidumpMiscInfo wraps MDRawMiscInfo and provides information about
701 // the process that generated the minidump, and optionally additional system
702 // information.  See also MinidumpSystemInfo.
703 class MinidumpMiscInfo : public MinidumpStream {
704  public:
misc_info()705   const MDRawMiscInfo* misc_info() const {
706     return valid_ ? &misc_info_ : NULL;
707   }
708 
709   // Print a human-readable representation of the object to stdout.
710   void Print();
711 
712  private:
713   friend class Minidump;
714 
715   static const u_int32_t kStreamType = MD_MISC_INFO_STREAM;
716 
717   explicit MinidumpMiscInfo(Minidump* minidump_);
718 
719   bool Read(u_int32_t expected_size_);
720 
721   MDRawMiscInfo misc_info_;
722 };
723 
724 
725 // MinidumpBreakpadInfo wraps MDRawBreakpadInfo, which is an optional stream in
726 // a minidump that provides additional information about the process state
727 // at the time the minidump was generated.
728 class MinidumpBreakpadInfo : public MinidumpStream {
729  public:
breakpad_info()730   const MDRawBreakpadInfo* breakpad_info() const {
731     return valid_ ? &breakpad_info_ : NULL;
732   }
733 
734   // These thread IDs are used to determine if threads deserve special
735   // treatment, so special getters are provided to retrieve this data from
736   // the MDRawBreakpadInfo structure.  The getters return false if the thread
737   // IDs cannot be determined.
738   bool GetDumpThreadID(u_int32_t *thread_id) const;
739   bool GetRequestingThreadID(u_int32_t *thread_id) const;
740 
741   // Print a human-readable representation of the object to stdout.
742   void Print();
743 
744  private:
745   friend class Minidump;
746 
747   static const u_int32_t kStreamType = MD_BREAKPAD_INFO_STREAM;
748 
749   explicit MinidumpBreakpadInfo(Minidump* minidump_);
750 
751   bool Read(u_int32_t expected_size_);
752 
753   MDRawBreakpadInfo breakpad_info_;
754 };
755 
756 
757 // Minidump is the user's interface to a minidump file.  It wraps MDRawHeader
758 // and provides access to the minidump's top-level stream directory.
759 class Minidump {
760  public:
761   // path is the pathname of a file containing the minidump.
762   explicit Minidump(const string& path);
763 
764   virtual ~Minidump();
765 
path()766   virtual string path() const {
767     return path_;
768   }
set_max_streams(u_int32_t max_streams)769   static void set_max_streams(u_int32_t max_streams) {
770     max_streams_ = max_streams;
771   }
max_streams()772   static u_int32_t max_streams() { return max_streams_; }
773 
set_max_string_length(u_int32_t max_string_length)774   static void set_max_string_length(u_int32_t max_string_length) {
775     max_string_length_ = max_string_length;
776   }
max_string_length()777   static u_int32_t max_string_length() { return max_string_length_; }
778 
header()779   virtual const MDRawHeader* header() const { return valid_ ? &header_ : NULL; }
780 
781   // Reads the minidump file's header and top-level stream directory.
782   // The minidump is expected to be positioned at the beginning of the
783   // header.  Read() sets up the stream list and map, and validates the
784   // Minidump object.
785   virtual bool Read();
786 
787   // The next set of methods are stubs that call GetStream.  They exist to
788   // force code generation of the templatized API within the module, and
789   // to avoid exposing an ugly API (GetStream needs to accept a garbage
790   // parameter).
791   virtual MinidumpThreadList* GetThreadList();
792   MinidumpModuleList* GetModuleList();
793   MinidumpMemoryList* GetMemoryList();
794   MinidumpException* GetException();
795   MinidumpSystemInfo* GetSystemInfo();
796   MinidumpMiscInfo* GetMiscInfo();
797   MinidumpBreakpadInfo* GetBreakpadInfo();
798 
799   // The next set of methods are provided for users who wish to access
800   // data in minidump files directly, while leveraging the rest of
801   // this class and related classes to handle the basic minidump
802   // structure and known stream types.
803 
GetDirectoryEntryCount()804   unsigned int GetDirectoryEntryCount() const {
805     return valid_ ? header_.stream_count : 0;
806   }
807   const MDRawDirectory* GetDirectoryEntryAtIndex(unsigned int index) const;
808 
809   // The next 2 methods are lower-level I/O routines.  They use fd_.
810 
811   // Reads count bytes from the minidump at the current position into
812   // the storage area pointed to by bytes.  bytes must be of sufficient
813   // size.  After the read, the file position is advanced by count.
814   bool ReadBytes(void* bytes, size_t count);
815 
816   // Sets the position of the minidump file to offset.
817   bool SeekSet(off_t offset);
818 
819   // Returns the current position of the minidump file.
Tell()820   off_t Tell() { return valid_ ? lseek(fd_, 0, SEEK_CUR) : (off_t)-1; }
821 
822   // The next 2 methods are medium-level I/O routines.
823 
824   // ReadString returns a string which is owned by the caller!  offset
825   // specifies the offset that a length-encoded string is stored at in the
826   // minidump file.
827   string* ReadString(off_t offset);
828 
829   // SeekToStreamType positions the file at the beginning of a stream
830   // identified by stream_type, and informs the caller of the stream's
831   // length by setting *stream_length.  Because stream_map maps each stream
832   // type to only one stream in the file, this might mislead the user into
833   // thinking that the stream that this seeks to is the only stream with
834   // type stream_type.  That can't happen for streams that these classes
835   // deal with directly, because they're only supposed to be present in the
836   // file singly, and that's verified when stream_map_ is built.  Users who
837   // are looking for other stream types should be aware of this
838   // possibility, and consider using GetDirectoryEntryAtIndex (possibly
839   // with GetDirectoryEntryCount) if expecting multiple streams of the same
840   // type in a single minidump file.
841   bool SeekToStreamType(u_int32_t stream_type, u_int32_t* stream_length);
842 
swap()843   bool swap() const { return valid_ ? swap_ : false; }
844 
845   // Print a human-readable representation of the object to stdout.
846   void Print();
847 
848  private:
849   // MinidumpStreamInfo is used in the MinidumpStreamMap.  It lets
850   // the Minidump object locate interesting streams quickly, and
851   // provides a convenient place to stash MinidumpStream objects.
852   struct MinidumpStreamInfo {
MinidumpStreamInfoMinidumpStreamInfo853     MinidumpStreamInfo() : stream_index(0), stream(NULL) {}
~MinidumpStreamInfoMinidumpStreamInfo854     ~MinidumpStreamInfo() { delete stream; }
855 
856     // Index into the MinidumpDirectoryEntries vector
857     unsigned int    stream_index;
858 
859     // Pointer to the stream if cached, or NULL if not yet populated
860     MinidumpStream* stream;
861   };
862 
863   typedef vector<MDRawDirectory> MinidumpDirectoryEntries;
864   typedef map<u_int32_t, MinidumpStreamInfo> MinidumpStreamMap;
865 
866   template<typename T> T* GetStream(T** stream);
867 
868   // Opens the minidump file, or if already open, seeks to the beginning.
869   bool Open();
870 
871   // The largest number of top-level streams that will be read from a minidump.
872   // Note that streams are only read (and only consume memory) as needed,
873   // when directed by the caller.  The default is 128.
874   static u_int32_t max_streams_;
875 
876   // The maximum length of a UTF-16 string that will be read from a minidump
877   // in 16-bit words.  The default is 1024.  UTF-16 strings are converted
878   // to UTF-8 when stored in memory, and each UTF-16 word will be represented
879   // by as many as 3 bytes in UTF-8.
880   static unsigned int max_string_length_;
881 
882   MDRawHeader               header_;
883 
884   // The list of streams.
885   MinidumpDirectoryEntries* directory_;
886 
887   // Access to streams using the stream type as the key.
888   MinidumpStreamMap*        stream_map_;
889 
890   // The pathname of the minidump file to process, set in the constructor.
891   const string              path_;
892 
893   // The file descriptor for all file I/O.  Used by ReadBytes and SeekSet.
894   // Set based on the |path_| member by Open, which is called by Read.
895   int                       fd_;
896 
897   // swap_ is true if the minidump file should be byte-swapped.  If the
898   // minidump was produced by a CPU that is other-endian than the CPU
899   // processing the minidump, this will be true.  If the two CPUs are
900   // same-endian, this will be false.
901   bool                      swap_;
902 
903   // Validity of the Minidump structure, false immediately after
904   // construction or after a failed Read(); true following a successful
905   // Read().
906   bool                      valid_;
907 };
908 
909 
910 }  // namespace google_breakpad
911 
912 
913 #endif  // GOOGLE_BREAKPAD_PROCESSOR_MINIDUMP_H__
914