1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5"use strict";
6
7const myScope = this;
8
9ChromeUtils.import("resource://gre/modules/KeyValueParser.jsm");
10ChromeUtils.import("resource://gre/modules/Log.jsm", this);
11ChromeUtils.import("resource://gre/modules/osfile.jsm", this);
12ChromeUtils.import("resource://gre/modules/PromiseUtils.jsm");
13ChromeUtils.import("resource://gre/modules/Services.jsm", this);
14ChromeUtils.import("resource://gre/modules/TelemetryController.jsm");
15ChromeUtils.import("resource://gre/modules/Timer.jsm", this);
16ChromeUtils.import("resource://gre/modules/XPCOMUtils.jsm", this);
17
18var EXPORTED_SYMBOLS = [
19  "CrashManager",
20];
21
22/**
23 * How long to wait after application startup before crash event files are
24 * automatically aggregated.
25 *
26 * We defer aggregation for performance reasons, as we don't want too many
27 * services competing for I/O immediately after startup.
28 */
29const AGGREGATE_STARTUP_DELAY_MS = 57000;
30
31const MILLISECONDS_IN_DAY = 24 * 60 * 60 * 1000;
32
33// Converts Date to days since UNIX epoch.
34// This was copied from /services/metrics.storage.jsm. The implementation
35// does not account for leap seconds.
36function dateToDays(date) {
37  return Math.floor(date.getTime() / MILLISECONDS_IN_DAY);
38}
39
40/**
41 * Get a field from the specified object and remove it.
42 *
43 * @param obj {Object} The object holding the field
44 * @param field {String} The name of the field to be parsed and removed
45 *
46 * @returns {String} the field contents as a string, null if none was found
47 */
48function getAndRemoveField(obj, field) {
49  let value = null;
50
51  if (field in obj) {
52    // We split extra files on LF characters but Windows-generated ones might
53    // contain trailing CR characters so trim them here.
54    value = obj[field].trim();
55
56    delete obj[field];
57  }
58
59  return value;
60}
61
62/**
63 * Parse the string stored in the specified field as JSON and then remove the
64 * field from the object.
65 *
66 * @param obj {Object} The object holding the field
67 * @param field {String} The name of the field to be parsed and removed
68 *
69 * @returns {Object} the parsed object, null if none was found
70 */
71function parseAndRemoveField(obj, field) {
72  let value = null;
73
74  if (field in obj) {
75    try {
76      value = JSON.parse(obj[field]);
77    } catch (e) {
78      Cu.reportError(e);
79    }
80
81    delete obj[field];
82  }
83
84  return value;
85}
86
87/**
88 * A gateway to crash-related data.
89 *
90 * This type is generic and can be instantiated any number of times.
91 * However, most applications will typically only have one instance
92 * instantiated and that instance will point to profile and user appdata
93 * directories.
94 *
95 * Instances are created by passing an object with properties.
96 * Recognized properties are:
97 *
98 *   pendingDumpsDir (string) (required)
99 *     Where dump files that haven't been uploaded are located.
100 *
101 *   submittedDumpsDir (string) (required)
102 *     Where records of uploaded dumps are located.
103 *
104 *   eventsDirs (array)
105 *     Directories (defined as strings) where events files are written. This
106 *     instance will collects events from files in the directories specified.
107 *
108 *   storeDir (string)
109 *     Directory we will use for our data store. This instance will write
110 *     data files into the directory specified.
111 *
112 *   telemetryStoreSizeKey (string)
113 *     Telemetry histogram to report store size under.
114 */
115var CrashManager = function(options) {
116  for (let k of ["pendingDumpsDir", "submittedDumpsDir", "eventsDirs",
117    "storeDir"]) {
118    if (!(k in options)) {
119      throw new Error("Required key not present in options: " + k);
120    }
121  }
122
123  this._log = Log.repository.getLogger("Crashes.CrashManager");
124
125  for (let k in options) {
126    let v = options[k];
127
128    switch (k) {
129      case "pendingDumpsDir":
130        this._pendingDumpsDir = v;
131        break;
132
133      case "submittedDumpsDir":
134        this._submittedDumpsDir = v;
135        break;
136
137      case "eventsDirs":
138        this._eventsDirs = v;
139        break;
140
141      case "storeDir":
142        this._storeDir = v;
143        break;
144
145      case "telemetryStoreSizeKey":
146        this._telemetryStoreSizeKey = v;
147        break;
148
149      default:
150        throw new Error("Unknown property in options: " + k);
151    }
152  }
153
154  // Promise for in-progress aggregation operation. We store it on the
155  // object so it can be returned for in-progress operations.
156  this._aggregatePromise = null;
157
158  // Map of crash ID / promise tuples used to track adding new crashes.
159  this._crashPromises = new Map();
160
161  // Promise for the crash ping used only for testing.
162  this._pingPromise = null;
163
164  // The CrashStore currently attached to this object.
165  this._store = null;
166
167  // A Task to retrieve the store. This is needed to avoid races when
168  // _getStore() is called multiple times in a short interval.
169  this._getStoreTask = null;
170
171  // The timer controlling the expiration of the CrashStore instance.
172  this._storeTimer = null;
173
174  // This is a semaphore that prevents the store from being freed by our
175  // timer-based resource freeing mechanism.
176  this._storeProtectedCount = 0;
177};
178
179this.CrashManager.prototype = Object.freeze({
180  // A crash in the main process.
181  PROCESS_TYPE_MAIN: "main",
182
183  // A crash in a content process.
184  PROCESS_TYPE_CONTENT: "content",
185
186  // A crash in a plugin process.
187  PROCESS_TYPE_PLUGIN: "plugin",
188
189  // A crash in a Gecko media plugin process.
190  PROCESS_TYPE_GMPLUGIN: "gmplugin",
191
192  // A crash in the GPU process.
193  PROCESS_TYPE_GPU: "gpu",
194
195  // A real crash.
196  CRASH_TYPE_CRASH: "crash",
197
198  // A hang.
199  CRASH_TYPE_HANG: "hang",
200
201  // Submission result values.
202  SUBMISSION_RESULT_OK: "ok",
203  SUBMISSION_RESULT_FAILED: "failed",
204
205  DUMP_REGEX: /^([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.dmp$/i,
206  SUBMITTED_REGEX: /^bp-(?:hr-)?([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.txt$/i,
207  ALL_REGEX: /^(.*)$/,
208
209  // How long the store object should persist in memory before being
210  // automatically garbage collected.
211  STORE_EXPIRATION_MS: 60 * 1000,
212
213  // Number of days after which a crash with no activity will get purged.
214  PURGE_OLDER_THAN_DAYS: 180,
215
216  // The following are return codes for individual event file processing.
217  // File processed OK.
218  EVENT_FILE_SUCCESS: "ok",
219  // The event appears to be malformed.
220  EVENT_FILE_ERROR_MALFORMED: "malformed",
221  // The type of event is unknown.
222  EVENT_FILE_ERROR_UNKNOWN_EVENT: "unknown-event",
223
224  // A whitelist of crash annotations which do not contain sensitive data
225  // and are saved in the crash record and sent with Firefox Health Report.
226  ANNOTATION_WHITELIST: [
227    "AsyncShutdownTimeout",
228    "BuildID",
229    "ipc_channel_error",
230    "ProductID",
231    "ProductName",
232    "ReleaseChannel",
233    "RemoteType",
234    "SecondsSinceLastCrash",
235    "ShutdownProgress",
236    "StartupCrash",
237    "TelemetryEnvironment",
238    "Version",
239    // The following entries are not normal annotations that can be found in
240    // the .extra file but are included in the crash record/FHR:
241    "AvailablePageFile",
242    "AvailablePhysicalMemory",
243    "AvailableVirtualMemory",
244    "BlockedDllList",
245    "BlocklistInitFailed",
246    "ContainsMemoryReport",
247    "CrashTime",
248    "EventLoopNestingLevel",
249    "IsGarbageCollecting",
250    "MozCrashReason",
251    "OOMAllocationSize",
252    "SystemMemoryUsePercentage",
253    "TextureUsage",
254    "TotalPageFile",
255    "TotalPhysicalMemory",
256    "TotalVirtualMemory",
257    "UptimeTS",
258    "User32BeforeBlocklist",
259  ],
260
261  /**
262   * Obtain a list of all dumps pending upload.
263   *
264   * The returned value is a promise that resolves to an array of objects
265   * on success. Each element in the array has the following properties:
266   *
267   *   id (string)
268   *      The ID of the crash (a UUID).
269   *
270   *   path (string)
271   *      The filename of the crash (<UUID.dmp>)
272   *
273   *   date (Date)
274   *      When this dump was created
275   *
276   * The returned arry is sorted by the modified time of the file backing
277   * the entry, oldest to newest.
278   *
279   * @return Promise<Array>
280   */
281  pendingDumps() {
282    return this._getDirectoryEntries(this._pendingDumpsDir, this.DUMP_REGEX);
283  },
284
285  /**
286   * Obtain a list of all dump files corresponding to submitted crashes.
287   *
288   * The returned value is a promise that resolves to an Array of
289   * objects. Each object has the following properties:
290   *
291   *   path (string)
292   *     The path of the file this entry comes from.
293   *
294   *   id (string)
295   *     The crash UUID.
296   *
297   *   date (Date)
298   *     The (estimated) date this crash was submitted.
299   *
300   * The returned array is sorted by the modified time of the file backing
301   * the entry, oldest to newest.
302   *
303   * @return Promise<Array>
304   */
305  submittedDumps() {
306    return this._getDirectoryEntries(this._submittedDumpsDir,
307                                     this.SUBMITTED_REGEX);
308  },
309
310  /**
311   * Aggregates "loose" events files into the unified "database."
312   *
313   * This function should be called periodically to collect metadata from
314   * all events files into the central data store maintained by this manager.
315   *
316   * Once events have been stored in the backing store the corresponding
317   * source files are deleted.
318   *
319   * Only one aggregation operation is allowed to occur at a time. If this
320   * is called when an existing aggregation is in progress, the promise for
321   * the original call will be returned.
322   *
323   * @return promise<int> The number of event files that were examined.
324   */
325  aggregateEventsFiles() {
326    if (this._aggregatePromise) {
327      return this._aggregatePromise;
328    }
329
330    return this._aggregatePromise = (async () => {
331      if (this._aggregatePromise) {
332        return this._aggregatePromise;
333      }
334
335      try {
336        let unprocessedFiles = await this._getUnprocessedEventsFiles();
337
338        let deletePaths = [];
339        let needsSave = false;
340
341        this._storeProtectedCount++;
342        for (let entry of unprocessedFiles) {
343          try {
344            let result = await this._processEventFile(entry);
345
346            switch (result) {
347              case this.EVENT_FILE_SUCCESS:
348                needsSave = true;
349                // Fall through.
350
351              case this.EVENT_FILE_ERROR_MALFORMED:
352                deletePaths.push(entry.path);
353                break;
354
355              case this.EVENT_FILE_ERROR_UNKNOWN_EVENT:
356                break;
357
358              default:
359                Cu.reportError("Unhandled crash event file return code. Please " +
360                               "file a bug: " + result);
361            }
362          } catch (ex) {
363            if (ex instanceof OS.File.Error) {
364              this._log.warn("I/O error reading " + entry.path, ex);
365            } else {
366              // We should never encounter an exception. This likely represents
367              // a coding error because all errors should be detected and
368              // converted to return codes.
369              //
370              // If we get here, report the error and delete the source file
371              // so we don't see it again.
372              Cu.reportError("Exception when processing crash event file: " +
373                             Log.exceptionStr(ex));
374              deletePaths.push(entry.path);
375            }
376          }
377        }
378
379        if (needsSave) {
380          let store = await this._getStore();
381          await store.save();
382        }
383
384        for (let path of deletePaths) {
385          try {
386            await OS.File.remove(path);
387          } catch (ex) {
388            this._log.warn("Error removing event file (" + path + ")", ex);
389          }
390        }
391
392        return unprocessedFiles.length;
393
394      } finally {
395        this._aggregatePromise = false;
396        this._storeProtectedCount--;
397      }
398    })();
399  },
400
401  /**
402   * Prune old crash data.
403   *
404   * @param date
405   *        (Date) The cutoff point for pruning. Crashes without data newer
406   *        than this will be pruned.
407   */
408  pruneOldCrashes(date) {
409    return (async () => {
410      let store = await this._getStore();
411      store.pruneOldCrashes(date);
412      await store.save();
413    })();
414  },
415
416  /**
417   * Run tasks that should be periodically performed.
418   */
419  runMaintenanceTasks() {
420    return (async () => {
421      await this.aggregateEventsFiles();
422
423      let offset = this.PURGE_OLDER_THAN_DAYS * MILLISECONDS_IN_DAY;
424      await this.pruneOldCrashes(new Date(Date.now() - offset));
425    })();
426  },
427
428  /**
429   * Schedule maintenance tasks for some point in the future.
430   *
431   * @param delay
432   *        (integer) Delay in milliseconds when maintenance should occur.
433   */
434  scheduleMaintenance(delay) {
435    let deferred = PromiseUtils.defer();
436
437    setTimeout(() => {
438      this.runMaintenanceTasks().then(deferred.resolve, deferred.reject);
439    }, delay);
440
441    return deferred.promise;
442  },
443
444  /**
445   * Record the occurrence of a crash.
446   *
447   * This method skips event files altogether and writes directly and
448   * immediately to the manager's data store.
449   *
450   * @param processType (string) One of the PROCESS_TYPE constants.
451   * @param crashType (string) One of the CRASH_TYPE constants.
452   * @param id (string) Crash ID. Likely a UUID.
453   * @param date (Date) When the crash occurred.
454   * @param metadata (dictionary) Crash metadata, may be empty.
455   *
456   * @return promise<null> Resolved when the store has been saved.
457   */
458  addCrash(processType, crashType, id, date, metadata) {
459    let promise = (async () => {
460      let store = await this._getStore();
461      if (store.addCrash(processType, crashType, id, date, metadata)) {
462        await store.save();
463      }
464
465      let deferred = this._crashPromises.get(id);
466
467      if (deferred) {
468        this._crashPromises.delete(id);
469        deferred.resolve();
470      }
471
472      // Send a telemetry ping for each non-main process crash
473      if (processType === this.PROCESS_TYPE_CONTENT ||
474          processType === this.PROCESS_TYPE_GPU) {
475        this._sendCrashPing(id, processType, date, metadata);
476      }
477    })();
478
479    return promise;
480  },
481
482  /**
483   * Returns a promise that is resolved only the crash with the specified id
484   * has been fully recorded.
485   *
486   * @param id (string) Crash ID. Likely a UUID.
487   *
488   * @return promise<null> Resolved when the crash is present.
489   */
490  async ensureCrashIsPresent(id) {
491    let store = await this._getStore();
492    let crash = store.getCrash(id);
493
494    if (crash) {
495      return Promise.resolve();
496    }
497
498    let deferred = PromiseUtils.defer();
499
500    this._crashPromises.set(id, deferred);
501    return deferred.promise;
502  },
503
504  /**
505   * Record the remote ID for a crash.
506   *
507   * @param crashID (string) Crash ID. Likely a UUID.
508   * @param remoteID (Date) Server/Breakpad ID.
509   *
510   * @return boolean True if the remote ID was recorded.
511   */
512  async setRemoteCrashID(crashID, remoteID) {
513    let store = await this._getStore();
514    if (store.setRemoteCrashID(crashID, remoteID)) {
515      await store.save();
516    }
517  },
518
519  /**
520   * Generate a submission ID for use with addSubmission{Attempt,Result}.
521   */
522  generateSubmissionID() {
523    return "sub-" + Cc["@mozilla.org/uuid-generator;1"]
524                      .getService(Ci.nsIUUIDGenerator)
525                      .generateUUID().toString().slice(1, -1);
526  },
527
528  /**
529   * Record the occurrence of a submission attempt for a crash.
530   *
531   * @param crashID (string) Crash ID. Likely a UUID.
532   * @param submissionID (string) Submission ID. Likely a UUID.
533   * @param date (Date) When the attempt occurred.
534   *
535   * @return boolean True if the attempt was recorded and false if not.
536   */
537  async addSubmissionAttempt(crashID, submissionID, date) {
538    let store = await this._getStore();
539    if (store.addSubmissionAttempt(crashID, submissionID, date)) {
540      await store.save();
541    }
542  },
543
544  /**
545   * Record the occurrence of a submission result for a crash.
546   *
547   * @param crashID (string) Crash ID. Likely a UUID.
548   * @param submissionID (string) Submission ID. Likely a UUID.
549   * @param date (Date) When the submission result was obtained.
550   * @param result (string) One of the SUBMISSION_RESULT constants.
551   *
552   * @return boolean True if the result was recorded and false if not.
553   */
554  async addSubmissionResult(crashID, submissionID, date, result) {
555    let store = await this._getStore();
556    if (store.addSubmissionResult(crashID, submissionID, date, result)) {
557      await store.save();
558    }
559  },
560
561  /**
562   * Set the classification of a crash.
563   *
564   * @param crashID (string) Crash ID. Likely a UUID.
565   * @param classifications (array) Crash classifications.
566   *
567   * @return boolean True if the data was recorded and false if not.
568   */
569  async setCrashClassifications(crashID, classifications) {
570    let store = await this._getStore();
571    if (store.setCrashClassifications(crashID, classifications)) {
572      await store.save();
573    }
574  },
575
576  /**
577   * Obtain the paths of all unprocessed events files.
578   *
579   * The promise-resolved array is sorted by file mtime, oldest to newest.
580   */
581  _getUnprocessedEventsFiles() {
582    return (async () => {
583      let entries = [];
584
585      for (let dir of this._eventsDirs) {
586        for (let e of await this._getDirectoryEntries(dir, this.ALL_REGEX)) {
587          entries.push(e);
588        }
589      }
590
591      entries.sort((a, b) => { return a.date - b.date; });
592
593      return entries;
594    })();
595  },
596
597  // See docs/crash-events.rst for the file format specification.
598  _processEventFile(entry) {
599    return (async () => {
600      let data = await OS.File.read(entry.path);
601      let store = await this._getStore();
602
603      let decoder = new TextDecoder();
604      data = decoder.decode(data);
605
606      let type, time;
607      let start = 0;
608      for (let i = 0; i < 2; i++) {
609        let index = data.indexOf("\n", start);
610        if (index == -1) {
611          return this.EVENT_FILE_ERROR_MALFORMED;
612        }
613
614        let sub = data.substring(start, index);
615        switch (i) {
616          case 0:
617            type = sub;
618            break;
619          case 1:
620            time = sub;
621            try {
622              time = parseInt(time, 10);
623            } catch (ex) {
624              return this.EVENT_FILE_ERROR_MALFORMED;
625            }
626        }
627
628        start = index + 1;
629      }
630      let date = new Date(time * 1000);
631      let payload = data.substring(start);
632
633      return this._handleEventFilePayload(store, entry, type, date, payload);
634    })();
635  },
636
637  _filterAnnotations(annotations) {
638    let filteredAnnotations = {};
639
640    for (let line in annotations) {
641      if (this.ANNOTATION_WHITELIST.includes(line)) {
642        filteredAnnotations[line] = annotations[line];
643      }
644    }
645
646    return filteredAnnotations;
647  },
648
649  _sendCrashPing(crashId, type, date, metadata = {}) {
650    // If we have a saved environment, use it. Otherwise report
651    // the current environment.
652    let reportMeta = Cu.cloneInto(metadata, myScope);
653    let crashEnvironment = parseAndRemoveField(reportMeta,
654                                               "TelemetryEnvironment");
655    let sessionId = getAndRemoveField(reportMeta, "TelemetrySessionId");
656    let stackTraces = parseAndRemoveField(reportMeta, "StackTraces");
657    let minidumpSha256Hash = getAndRemoveField(reportMeta,
658                                               "MinidumpSha256Hash");
659
660    // Filter the remaining annotations to remove privacy-sensitive ones
661    reportMeta = this._filterAnnotations(reportMeta);
662
663    this._pingPromise = TelemetryController.submitExternalPing("crash",
664      {
665        version: 1,
666        crashDate: date.toISOString().slice(0, 10), // YYYY-MM-DD
667        crashTime: date.toISOString().slice(0, 13) + ":00:00.000Z", // per-hour resolution
668        sessionId,
669        crashId,
670        minidumpSha256Hash,
671        processType: type,
672        stackTraces,
673        metadata: reportMeta,
674        hasCrashEnvironment: (crashEnvironment !== null),
675      },
676      {
677        addClientId: true,
678        addEnvironment: true,
679        overrideEnvironment: crashEnvironment,
680      }
681    );
682  },
683
684  _handleEventFilePayload(store, entry, type, date, payload) {
685      // The payload types and formats are documented in docs/crash-events.rst.
686      // Do not change the format of an existing type. Instead, invent a new
687      // type.
688      // DO NOT ADD NEW TYPES WITHOUT DOCUMENTING!
689      let lines = payload.split("\n");
690
691      switch (type) {
692        case "crash.main.1":
693          if (lines.length > 1) {
694            this._log.warn("Multiple lines unexpected in payload for " +
695                           entry.path);
696            return this.EVENT_FILE_ERROR_MALFORMED;
697          }
698          // fall-through
699        case "crash.main.2":
700          let crashID = lines[0];
701          let metadata = parseKeyValuePairsFromLines(lines.slice(1));
702          store.addCrash(this.PROCESS_TYPE_MAIN, this.CRASH_TYPE_CRASH,
703                         crashID, date, metadata);
704
705          if (!("CrashPingUUID" in metadata)) {
706            // If CrashPingUUID is not present then a ping was not generated
707            // by the crashreporter for this crash so we need to send one from
708            // here.
709            this._sendCrashPing(crashID, this.PROCESS_TYPE_MAIN, date,
710                                metadata);
711          }
712
713          break;
714
715        case "crash.submission.1":
716          if (lines.length == 3) {
717            let [crashID, result, remoteID] = lines;
718            store.addCrash(this.PROCESS_TYPE_MAIN, this.CRASH_TYPE_CRASH,
719                           crashID, date);
720
721            let submissionID = this.generateSubmissionID();
722            let succeeded = result === "true";
723            store.addSubmissionAttempt(crashID, submissionID, date);
724            store.addSubmissionResult(crashID, submissionID, date,
725                                      succeeded ? this.SUBMISSION_RESULT_OK :
726                                                  this.SUBMISSION_RESULT_FAILED);
727            if (succeeded) {
728              store.setRemoteCrashID(crashID, remoteID);
729            }
730          } else {
731            return this.EVENT_FILE_ERROR_MALFORMED;
732          }
733          break;
734
735        default:
736          return this.EVENT_FILE_ERROR_UNKNOWN_EVENT;
737      }
738
739      return this.EVENT_FILE_SUCCESS;
740  },
741
742  /**
743   * The resolved promise is an array of objects with the properties:
744   *
745   *   path -- String filename
746   *   id -- regexp.match()[1] (likely the crash ID)
747   *   date -- Date mtime of the file
748   */
749  _getDirectoryEntries(path, re) {
750    return (async function() {
751      try {
752        await OS.File.stat(path);
753      } catch (ex) {
754        if (!(ex instanceof OS.File.Error) || !ex.becauseNoSuchFile) {
755          throw ex;
756        }
757        return [];
758      }
759
760      let it = new OS.File.DirectoryIterator(path);
761      let entries = [];
762
763      try {
764        await it.forEach((entry, index, it) => {
765          if (entry.isDir) {
766            return undefined;
767          }
768
769          let match = re.exec(entry.name);
770          if (!match) {
771            return undefined;
772          }
773
774          return OS.File.stat(entry.path).then((info) => {
775            entries.push({
776              path: entry.path,
777              id: match[1],
778              date: info.lastModificationDate,
779            });
780          });
781        });
782      } finally {
783        it.close();
784      }
785
786      entries.sort((a, b) => { return a.date - b.date; });
787
788      return entries;
789    })();
790  },
791
792  _getStore() {
793    if (this._getStoreTask) {
794      return this._getStoreTask;
795    }
796
797    return this._getStoreTask = (async () => {
798      try {
799        if (!this._store) {
800          await OS.File.makeDir(this._storeDir, {
801            ignoreExisting: true,
802            unixMode: OS.Constants.libc.S_IRWXU,
803          });
804
805          let store = new CrashStore(this._storeDir,
806                                     this._telemetryStoreSizeKey);
807          await store.load();
808
809          this._store = store;
810          this._storeTimer = Cc["@mozilla.org/timer;1"]
811                               .createInstance(Ci.nsITimer);
812        }
813
814        // The application can go long periods without interacting with the
815        // store. Since the store takes up resources, we automatically "free"
816        // the store after inactivity so resources can be returned to the
817        // system. We do this via a timer and a mechanism that tracks when the
818        // store is being accessed.
819        this._storeTimer.cancel();
820
821        // This callback frees resources from the store unless the store
822        // is protected from freeing by some other process.
823        let timerCB = () => {
824          if (this._storeProtectedCount) {
825            this._storeTimer.initWithCallback(timerCB, this.STORE_EXPIRATION_MS,
826                                              this._storeTimer.TYPE_ONE_SHOT);
827            return;
828          }
829
830          // We kill the reference that we hold. GC will kill it later. If
831          // someone else holds a reference, that will prevent GC until that
832          // reference is gone.
833          this._store = null;
834          this._storeTimer = null;
835        };
836
837        this._storeTimer.initWithCallback(timerCB, this.STORE_EXPIRATION_MS,
838                                          this._storeTimer.TYPE_ONE_SHOT);
839
840        return this._store;
841      } finally {
842        this._getStoreTask = null;
843      }
844    })();
845  },
846
847  /**
848   * Obtain information about all known crashes.
849   *
850   * Returns an array of CrashRecord instances. Instances are read-only.
851   */
852  getCrashes() {
853    return (async () => {
854      let store = await this._getStore();
855
856      return store.crashes;
857    })();
858  },
859
860  getCrashCountsByDay() {
861    return (async () => {
862      let store = await this._getStore();
863
864      return store._countsByDay;
865    })();
866  },
867});
868
869var gCrashManager;
870
871/**
872 * Interface to storage of crash data.
873 *
874 * This type handles storage of crash metadata. It exists as a separate type
875 * from the crash manager for performance reasons: since all crash metadata
876 * needs to be loaded into memory for access, we wish to easily dispose of all
877 * associated memory when this data is no longer needed. Having an isolated
878 * object whose references can easily be lost faciliates that simple disposal.
879 *
880 * When metadata is updated, the caller must explicitly persist the changes
881 * to disk. This prevents excessive I/O during updates.
882 *
883 * The store has a mechanism for ensuring it doesn't grow too large. A ceiling
884 * is placed on the number of daily events that can occur for events that can
885 * occur with relatively high frequency, notably plugin crashes and hangs
886 * (plugins can enter cycles where they repeatedly crash). If we've reached
887 * the high water mark and new data arrives, it's silently dropped.
888 * However, the count of actual events is always preserved. This allows
889 * us to report on the severity of problems beyond the storage threshold.
890 *
891 * Main process crashes are excluded from limits because they are both
892 * important and should be rare.
893 *
894 * @param storeDir (string)
895 *        Directory the store should be located in.
896 * @param telemetrySizeKey (string)
897 *        The telemetry histogram that should be used to store the size
898 *        of the data file.
899 */
900function CrashStore(storeDir, telemetrySizeKey) {
901  this._storeDir = storeDir;
902  this._telemetrySizeKey = telemetrySizeKey;
903
904  this._storePath = OS.Path.join(storeDir, "store.json.mozlz4");
905
906  // Holds the read data from disk.
907  this._data = null;
908
909  // Maps days since UNIX epoch to a Map of event types to counts.
910  // This data structure is populated when the JSON file is loaded
911  // and is also updated when new events are added.
912  this._countsByDay = new Map();
913}
914
915CrashStore.prototype = Object.freeze({
916  // Maximum number of events to store per day. This establishes a
917  // ceiling on the per-type/per-day records that will be stored.
918  HIGH_WATER_DAILY_THRESHOLD: 100,
919
920  /**
921   * Reset all data.
922   */
923  reset() {
924    this._data = {
925      v: 1,
926      crashes: new Map(),
927      corruptDate: null,
928    };
929    this._countsByDay = new Map();
930  },
931
932  /**
933   * Load data from disk.
934   *
935   * @return Promise
936   */
937  load() {
938    return (async () => {
939      // Loading replaces data.
940      this.reset();
941
942      try {
943        let decoder = new TextDecoder();
944        let data = await OS.File.read(this._storePath, {compression: "lz4"});
945        data = JSON.parse(decoder.decode(data));
946
947        if (data.corruptDate) {
948          this._data.corruptDate = new Date(data.corruptDate);
949        }
950
951        // actualCounts is used to validate that the derived counts by
952        // days stored in the payload matches up to actual data.
953        let actualCounts = new Map();
954
955        // In the past, submissions were stored as separate crash records
956        // with an id of e.g. "someID-submission". If we find IDs ending
957        // with "-submission", we will need to convert the data to be stored
958        // as actual submissions.
959        //
960        // The old way of storing submissions was used from FF33 - FF34. We
961        // drop this old data on the floor.
962        for (let id in data.crashes) {
963          if (id.endsWith("-submission")) {
964            continue;
965          }
966
967          let crash = data.crashes[id];
968          let denormalized = this._denormalize(crash);
969
970          denormalized.submissions = new Map();
971          if (crash.submissions) {
972            for (let submissionID in crash.submissions) {
973              let submission = crash.submissions[submissionID];
974              denormalized.submissions.set(submissionID,
975                                           this._denormalize(submission));
976            }
977          }
978
979          this._data.crashes.set(id, denormalized);
980
981          let key = dateToDays(denormalized.crashDate) + "-" + denormalized.type;
982          actualCounts.set(key, (actualCounts.get(key) || 0) + 1);
983
984          // If we have an OOM size, count the crash as an OOM in addition to
985          // being a main process crash.
986          if (denormalized.metadata &&
987              denormalized.metadata.OOMAllocationSize) {
988            let oomKey = key + "-oom";
989            actualCounts.set(oomKey, (actualCounts.get(oomKey) || 0) + 1);
990          }
991
992        }
993
994        // The validation in this loop is arguably not necessary. We perform
995        // it as a defense against unknown bugs.
996        for (let dayKey in data.countsByDay) {
997          let day = parseInt(dayKey, 10);
998          for (let type in data.countsByDay[day]) {
999            this._ensureCountsForDay(day);
1000
1001            let count = data.countsByDay[day][type];
1002            let key = day + "-" + type;
1003
1004            // If the payload says we have data for a given day but we
1005            // don't, the payload is wrong. Ignore it.
1006            if (!actualCounts.has(key)) {
1007              continue;
1008            }
1009
1010            // If we encountered more data in the payload than what the
1011            // data structure says, use the proper value.
1012            count = Math.max(count, actualCounts.get(key));
1013
1014            this._countsByDay.get(day).set(type, count);
1015          }
1016        }
1017      } catch (ex) {
1018        // Missing files (first use) are allowed.
1019        if (!(ex instanceof OS.File.Error) || !ex.becauseNoSuchFile) {
1020          // If we can't load for any reason, mark a corrupt date in the instance
1021          // and swallow the error.
1022          //
1023          // The marking of a corrupted file is intentionally not persisted to
1024          // disk yet. Instead, we wait until the next save(). This is to give
1025          // non-permanent failures the opportunity to recover on their own.
1026          this._data.corruptDate = new Date();
1027        }
1028      }
1029    })();
1030  },
1031
1032  /**
1033   * Save data to disk.
1034   *
1035   * @return Promise<null>
1036   */
1037  save() {
1038    return (async () => {
1039      if (!this._data) {
1040        return;
1041      }
1042
1043      let normalized = {
1044        // The version should be incremented whenever the format
1045        // changes.
1046        v: 1,
1047        // Maps crash IDs to objects defining the crash.
1048        crashes: {},
1049        // Maps days since UNIX epoch to objects mapping event types to
1050        // counts. This is a mirror of this._countsByDay. e.g.
1051        // {
1052        //    15000: {
1053        //        "main-crash": 2,
1054        //        "plugin-crash": 1
1055        //    }
1056        // }
1057        countsByDay: {},
1058
1059        // When the store was last corrupted.
1060        corruptDate: null,
1061      };
1062
1063      if (this._data.corruptDate) {
1064        normalized.corruptDate = this._data.corruptDate.getTime();
1065      }
1066
1067      for (let [id, crash] of this._data.crashes) {
1068        let c = this._normalize(crash);
1069
1070        c.submissions = {};
1071        for (let [submissionID, submission] of crash.submissions) {
1072          c.submissions[submissionID] = this._normalize(submission);
1073        }
1074
1075        normalized.crashes[id] = c;
1076      }
1077
1078      for (let [day, m] of this._countsByDay) {
1079        normalized.countsByDay[day] = {};
1080        for (let [type, count] of m) {
1081          normalized.countsByDay[day][type] = count;
1082        }
1083      }
1084
1085      let encoder = new TextEncoder();
1086      let data = encoder.encode(JSON.stringify(normalized));
1087      let size = await OS.File.writeAtomic(this._storePath, data, {
1088                                           tmpPath: this._storePath + ".tmp",
1089                                           compression: "lz4"});
1090      if (this._telemetrySizeKey) {
1091        Services.telemetry.getHistogramById(this._telemetrySizeKey).add(size);
1092      }
1093    })();
1094  },
1095
1096  /**
1097   * Normalize an object into one fit for serialization.
1098   *
1099   * This function along with _denormalize() serve to hack around the
1100   * default handling of Date JSON serialization because Date serialization
1101   * is undefined by JSON.
1102   *
1103   * Fields ending with "Date" are assumed to contain Date instances.
1104   * We convert these to milliseconds since epoch on output and back to
1105   * Date on input.
1106   */
1107  _normalize(o) {
1108    let normalized = {};
1109
1110    for (let k in o) {
1111      let v = o[k];
1112      if (v && k.endsWith("Date")) {
1113        normalized[k] = v.getTime();
1114      } else {
1115        normalized[k] = v;
1116      }
1117    }
1118
1119    return normalized;
1120  },
1121
1122  /**
1123   * Convert a serialized object back to its native form.
1124   */
1125  _denormalize(o) {
1126    let n = {};
1127
1128    for (let k in o) {
1129      let v = o[k];
1130      if (v && k.endsWith("Date")) {
1131        n[k] = new Date(parseInt(v, 10));
1132      } else {
1133        n[k] = v;
1134      }
1135    }
1136
1137    return n;
1138  },
1139
1140  /**
1141   * Prune old crash data.
1142   *
1143   * Crashes without recent activity are pruned from the store so the
1144   * size of the store is not unbounded. If there is activity on a crash,
1145   * that activity will keep the crash and all its data around for longer.
1146   *
1147   * @param date
1148   *        (Date) The cutoff at which data will be pruned. If an entry
1149   *        doesn't have data newer than this, it will be pruned.
1150   */
1151  pruneOldCrashes(date) {
1152    for (let crash of this.crashes) {
1153      let newest = crash.newestDate;
1154      if (!newest || newest.getTime() < date.getTime()) {
1155        this._data.crashes.delete(crash.id);
1156      }
1157    }
1158  },
1159
1160  /**
1161   * Date the store was last corrupted and required a reset.
1162   *
1163   * May be null (no corruption has ever occurred) or a Date instance.
1164   */
1165  get corruptDate() {
1166    return this._data.corruptDate;
1167  },
1168
1169  /**
1170   * The number of distinct crashes tracked.
1171   */
1172  get crashesCount() {
1173    return this._data.crashes.size;
1174  },
1175
1176  /**
1177   * All crashes tracked.
1178   *
1179   * This is an array of CrashRecord.
1180   */
1181  get crashes() {
1182    let crashes = [];
1183    for (let [, crash] of this._data.crashes) {
1184      crashes.push(new CrashRecord(crash));
1185    }
1186
1187    return crashes;
1188  },
1189
1190  /**
1191   * Obtain a particular crash from its ID.
1192   *
1193   * A CrashRecord will be returned if the crash exists. null will be returned
1194   * if the crash is unknown.
1195   */
1196  getCrash(id) {
1197    for (let crash of this.crashes) {
1198      if (crash.id == id) {
1199        return crash;
1200      }
1201    }
1202
1203    return null;
1204  },
1205
1206  _ensureCountsForDay(day) {
1207    if (!this._countsByDay.has(day)) {
1208      this._countsByDay.set(day, new Map());
1209    }
1210  },
1211
1212  /**
1213   * Ensure the crash record is present in storage.
1214   *
1215   * Returns the crash record if we're allowed to store it or null
1216   * if we've hit the high water mark.
1217   *
1218   * @param processType
1219   *        (string) One of the PROCESS_TYPE constants.
1220   * @param crashType
1221   *        (string) One of the CRASH_TYPE constants.
1222   * @param id
1223   *        (string) The crash ID.
1224   * @param date
1225   *        (Date) When this crash occurred.
1226   * @param metadata
1227   *        (dictionary) Crash metadata, may be empty.
1228   *
1229   * @return null | object crash record
1230   */
1231  _ensureCrashRecord(processType, crashType, id, date, metadata) {
1232    if (!id) {
1233      // Crashes are keyed on ID, so it's not really helpful to store crashes
1234      // without IDs.
1235      return null;
1236    }
1237
1238    let type = processType + "-" + crashType;
1239
1240    if (!this._data.crashes.has(id)) {
1241      let day = dateToDays(date);
1242      this._ensureCountsForDay(day);
1243
1244      let count = (this._countsByDay.get(day).get(type) || 0) + 1;
1245      this._countsByDay.get(day).set(type, count);
1246
1247      if (count > this.HIGH_WATER_DAILY_THRESHOLD &&
1248          processType != CrashManager.prototype.PROCESS_TYPE_MAIN) {
1249        return null;
1250      }
1251
1252      // If we have an OOM size, count the crash as an OOM in addition to
1253      // being a main process crash.
1254      if (metadata && metadata.OOMAllocationSize) {
1255        let oomType = type + "-oom";
1256        let oomCount = (this._countsByDay.get(day).get(oomType) || 0) + 1;
1257        this._countsByDay.get(day).set(oomType, oomCount);
1258      }
1259
1260      this._data.crashes.set(id, {
1261        id,
1262        remoteID: null,
1263        type,
1264        crashDate: date,
1265        submissions: new Map(),
1266        classifications: [],
1267        metadata,
1268      });
1269    }
1270
1271    let crash = this._data.crashes.get(id);
1272    crash.type = type;
1273    crash.crashDate = date;
1274
1275    return crash;
1276  },
1277
1278  /**
1279   * Record the occurrence of a crash.
1280   *
1281   * @param processType (string) One of the PROCESS_TYPE constants.
1282   * @param crashType (string) One of the CRASH_TYPE constants.
1283   * @param id (string) Crash ID. Likely a UUID.
1284   * @param date (Date) When the crash occurred.
1285   * @param metadata (dictionary) Crash metadata, may be empty.
1286   *
1287   * @return boolean True if the crash was recorded and false if not.
1288   */
1289  addCrash(processType, crashType, id, date, metadata) {
1290    return !!this._ensureCrashRecord(processType, crashType, id, date, metadata);
1291  },
1292
1293  /**
1294   * @return boolean True if the remote ID was recorded and false if not.
1295   */
1296  setRemoteCrashID(crashID, remoteID) {
1297    let crash = this._data.crashes.get(crashID);
1298    if (!crash || !remoteID) {
1299      return false;
1300    }
1301
1302    crash.remoteID = remoteID;
1303    return true;
1304  },
1305
1306  getCrashesOfType(processType, crashType) {
1307    let crashes = [];
1308    for (let crash of this.crashes) {
1309      if (crash.isOfType(processType, crashType)) {
1310        crashes.push(crash);
1311      }
1312    }
1313
1314    return crashes;
1315  },
1316
1317  /**
1318   * Ensure the submission record is present in storage.
1319   * @returns [submission, crash]
1320   */
1321  _ensureSubmissionRecord(crashID, submissionID) {
1322    let crash = this._data.crashes.get(crashID);
1323    if (!crash || !submissionID) {
1324      return null;
1325    }
1326
1327    if (!crash.submissions.has(submissionID)) {
1328      crash.submissions.set(submissionID, {
1329        requestDate: null,
1330        responseDate: null,
1331        result: null,
1332      });
1333    }
1334
1335    return [crash.submissions.get(submissionID), crash];
1336  },
1337
1338  /**
1339   * @return boolean True if the attempt was recorded.
1340   */
1341  addSubmissionAttempt(crashID, submissionID, date) {
1342    let [submission, crash] =
1343      this._ensureSubmissionRecord(crashID, submissionID);
1344    if (!submission) {
1345      return false;
1346    }
1347
1348    submission.requestDate = date;
1349    Services.telemetry.getKeyedHistogramById("PROCESS_CRASH_SUBMIT_ATTEMPT")
1350      .add(crash.type, 1);
1351    return true;
1352  },
1353
1354  /**
1355   * @return boolean True if the response was recorded.
1356   */
1357  addSubmissionResult(crashID, submissionID, date, result) {
1358    let crash = this._data.crashes.get(crashID);
1359    if (!crash || !submissionID) {
1360      return false;
1361    }
1362    let submission = crash.submissions.get(submissionID);
1363    if (!submission) {
1364      return false;
1365    }
1366
1367    submission.responseDate = date;
1368    submission.result = result;
1369    Services.telemetry.getKeyedHistogramById("PROCESS_CRASH_SUBMIT_SUCCESS")
1370      .add(crash.type, result == "ok");
1371    return true;
1372  },
1373
1374  /**
1375   * @return boolean True if the classifications were set.
1376   */
1377  setCrashClassifications(crashID, classifications) {
1378    let crash = this._data.crashes.get(crashID);
1379    if (!crash) {
1380      return false;
1381    }
1382
1383    crash.classifications = classifications;
1384    return true;
1385  },
1386});
1387
1388/**
1389 * Represents an individual crash with metadata.
1390 *
1391 * This is a wrapper around the low-level anonymous JS objects that define
1392 * crashes. It exposes a consistent and helpful API.
1393 *
1394 * Instances of this type should only be constructured inside this module,
1395 * not externally. The constructor is not considered a public API.
1396 *
1397 * @param o (object)
1398 *        The crash's entry from the CrashStore.
1399 */
1400function CrashRecord(o) {
1401  this._o = o;
1402}
1403
1404CrashRecord.prototype = Object.freeze({
1405  get id() {
1406    return this._o.id;
1407  },
1408
1409  get remoteID() {
1410    return this._o.remoteID;
1411  },
1412
1413  get crashDate() {
1414    return this._o.crashDate;
1415  },
1416
1417  /**
1418   * Obtain the newest date in this record.
1419   *
1420   * This is a convenience getter. The returned value is used to determine when
1421   * to expire a record.
1422   */
1423  get newestDate() {
1424    // We currently only have 1 date, so this is easy.
1425    return this._o.crashDate;
1426  },
1427
1428  get oldestDate() {
1429    return this._o.crashDate;
1430  },
1431
1432  get type() {
1433    return this._o.type;
1434  },
1435
1436  isOfType(processType, crashType) {
1437    return processType + "-" + crashType == this.type;
1438  },
1439
1440  get submissions() {
1441    return this._o.submissions;
1442  },
1443
1444  get classifications() {
1445    return this._o.classifications;
1446  },
1447
1448  get metadata() {
1449    return this._o.metadata;
1450  },
1451});
1452
1453/**
1454 * Obtain the global CrashManager instance used by the running application.
1455 *
1456 * CrashManager is likely only ever instantiated once per application lifetime.
1457 * The main reason it's implemented as a reusable type is to facilitate testing.
1458 */
1459XPCOMUtils.defineLazyGetter(this.CrashManager, "Singleton", function() {
1460  if (gCrashManager) {
1461    return gCrashManager;
1462  }
1463
1464  let crPath = OS.Path.join(OS.Constants.Path.userApplicationDataDir,
1465                            "Crash Reports");
1466  let storePath = OS.Path.join(OS.Constants.Path.profileDir, "crashes");
1467
1468  gCrashManager = new CrashManager({
1469    pendingDumpsDir: OS.Path.join(crPath, "pending"),
1470    submittedDumpsDir: OS.Path.join(crPath, "submitted"),
1471    eventsDirs: [OS.Path.join(crPath, "events"), OS.Path.join(storePath, "events")],
1472    storeDir: storePath,
1473    telemetryStoreSizeKey: "CRASH_STORE_COMPRESSED_BYTES",
1474  });
1475
1476  // Automatically aggregate event files shortly after startup. This
1477  // ensures it happens with some frequency.
1478  //
1479  // There are performance considerations here. While this is doing
1480  // work and could negatively impact performance, the amount of work
1481  // is kept small per run by periodically aggregating event files.
1482  // Furthermore, well-behaving installs should not have much work
1483  // here to do. If there is a lot of work, that install has bigger
1484  // issues beyond reduced performance near startup.
1485  gCrashManager.scheduleMaintenance(AGGREGATE_STARTUP_DELAY_MS);
1486
1487  return gCrashManager;
1488});
1489