1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5"use strict";
6
7const myScope = this;
8
9const { PromiseUtils } = ChromeUtils.import(
10  "resource://gre/modules/PromiseUtils.jsm"
11);
12const { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm");
13const { setTimeout } = ChromeUtils.import("resource://gre/modules/Timer.jsm");
14const { XPCOMUtils } = ChromeUtils.import(
15  "resource://gre/modules/XPCOMUtils.jsm"
16);
17
18XPCOMUtils.defineLazyModuleGetters(this, {
19  Log: "resource://gre/modules/Log.jsm",
20  TelemetryController: "resource://gre/modules/TelemetryController.jsm",
21});
22
23var EXPORTED_SYMBOLS = [
24  "CrashManager",
25  "getCrashManager",
26  // The following are exported for tests only.
27  "CrashStore",
28  "dateToDays",
29  "getCrashManagerNoCreate",
30];
31
32/**
33 * How long to wait after application startup before crash event files are
34 * automatically aggregated.
35 *
36 * We defer aggregation for performance reasons, as we don't want too many
37 * services competing for I/O immediately after startup.
38 */
39const AGGREGATE_STARTUP_DELAY_MS = 57000;
40
41const MILLISECONDS_IN_DAY = 24 * 60 * 60 * 1000;
42
43// Converts Date to days since UNIX epoch.
44// This was copied from /services/metrics.storage.jsm. The implementation
45// does not account for leap seconds.
46function dateToDays(date) {
47  return Math.floor(date.getTime() / MILLISECONDS_IN_DAY);
48}
49
50/**
51 * Get a field from the specified object and remove it.
52 *
53 * @param obj {Object} The object holding the field
54 * @param field {String} The name of the field to be parsed and removed
55 *
56 * @returns {String} the field contents as a string, null if none was found
57 */
58function getAndRemoveField(obj, field) {
59  let value = null;
60
61  if (field in obj) {
62    value = obj[field];
63    delete obj[field];
64  }
65
66  return value;
67}
68
69/**
70 * Parse the string stored in the specified field as JSON and then remove the
71 * field from the object.
72 *
73 * @param obj {Object} The object holding the field
74 * @param field {String} The name of the field to be parsed and removed
75 *
76 * @returns {Object} the parsed object, null if none was found
77 */
78function parseAndRemoveField(obj, field) {
79  let value = null;
80
81  if (field in obj) {
82    try {
83      value = JSON.parse(obj[field]);
84    } catch (e) {
85      Cu.reportError(e);
86    }
87
88    delete obj[field];
89  }
90
91  return value;
92}
93
94/**
95 * A gateway to crash-related data.
96 *
97 * This type is generic and can be instantiated any number of times.
98 * However, most applications will typically only have one instance
99 * instantiated and that instance will point to profile and user appdata
100 * directories.
101 *
102 * Instances are created by passing an object with properties.
103 * Recognized properties are:
104 *
105 *   pendingDumpsDir (string) (required)
106 *     Where dump files that haven't been uploaded are located.
107 *
108 *   submittedDumpsDir (string) (required)
109 *     Where records of uploaded dumps are located.
110 *
111 *   eventsDirs (array)
112 *     Directories (defined as strings) where events files are written. This
113 *     instance will collects events from files in the directories specified.
114 *
115 *   storeDir (string)
116 *     Directory we will use for our data store. This instance will write
117 *     data files into the directory specified.
118 *
119 *   telemetryStoreSizeKey (string)
120 *     Telemetry histogram to report store size under.
121 */
122var CrashManager = function(options) {
123  for (let k in options) {
124    let value = options[k];
125
126    switch (k) {
127      case "pendingDumpsDir":
128      case "submittedDumpsDir":
129      case "eventsDirs":
130      case "storeDir":
131        let key = "_" + k;
132        delete this[key];
133        Object.defineProperty(this, key, { value });
134        break;
135      case "telemetryStoreSizeKey":
136        this._telemetryStoreSizeKey = value;
137        break;
138
139      default:
140        throw new Error("Unknown property in options: " + k);
141    }
142  }
143
144  // Promise for in-progress aggregation operation. We store it on the
145  // object so it can be returned for in-progress operations.
146  this._aggregatePromise = null;
147
148  // Map of crash ID / promise tuples used to track adding new crashes.
149  this._crashPromises = new Map();
150
151  // Promise for the crash ping used only for testing.
152  this._pingPromise = null;
153
154  // The CrashStore currently attached to this object.
155  this._store = null;
156
157  // A Task to retrieve the store. This is needed to avoid races when
158  // _getStore() is called multiple times in a short interval.
159  this._getStoreTask = null;
160
161  // The timer controlling the expiration of the CrashStore instance.
162  this._storeTimer = null;
163
164  // This is a semaphore that prevents the store from being freed by our
165  // timer-based resource freeing mechanism.
166  this._storeProtectedCount = 0;
167};
168
169CrashManager.prototype = Object.freeze({
170  // gen_CrashManager.py will input the proper process map informations.
171  /* SUBST: CRASH_MANAGER_PROCESS_MAP */
172
173  // A real crash.
174  CRASH_TYPE_CRASH: "crash",
175
176  // A hang.
177  CRASH_TYPE_HANG: "hang",
178
179  // Submission result values.
180  SUBMISSION_RESULT_OK: "ok",
181  SUBMISSION_RESULT_FAILED: "failed",
182
183  DUMP_REGEX: /^([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.dmp$/i,
184  SUBMITTED_REGEX: /^bp-(?:hr-)?([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.txt$/i,
185  ALL_REGEX: /^(.*)$/,
186
187  // How long the store object should persist in memory before being
188  // automatically garbage collected.
189  STORE_EXPIRATION_MS: 60 * 1000,
190
191  // Number of days after which a crash with no activity will get purged.
192  PURGE_OLDER_THAN_DAYS: 180,
193
194  // The following are return codes for individual event file processing.
195  // File processed OK.
196  EVENT_FILE_SUCCESS: "ok",
197  // The event appears to be malformed.
198  EVENT_FILE_ERROR_MALFORMED: "malformed",
199  // The event is obsolete.
200  EVENT_FILE_ERROR_OBSOLETE: "obsolete",
201  // The type of event is unknown.
202  EVENT_FILE_ERROR_UNKNOWN_EVENT: "unknown-event",
203
204  _lazyGetDir(field, path, leaf) {
205    delete this[field];
206    let value = PathUtils.join(path, leaf);
207    Object.defineProperty(this, field, { value });
208    return value;
209  },
210
211  get _crDir() {
212    return this._lazyGetDir(
213      "_crDir",
214      Services.dirsvc.get("UAppData", Ci.nsIFile).path,
215      "Crash Reports"
216    );
217  },
218
219  get _storeDir() {
220    return this._lazyGetDir(
221      "_storeDir",
222      Services.dirsvc.get("ProfD", Ci.nsIFile).path,
223      "crashes"
224    );
225  },
226
227  get _pendingDumpsDir() {
228    return this._lazyGetDir("_pendingDumpsDir", this._crDir, "pending");
229  },
230
231  get _submittedDumpsDir() {
232    return this._lazyGetDir("_submittedDumpsDir", this._crDir, "submitted");
233  },
234
235  get _eventsDirs() {
236    delete this._eventsDirs;
237    let value = [
238      PathUtils.join(this._crDir, "events"),
239      PathUtils.join(this._storeDir, "events"),
240    ];
241    Object.defineProperty(this, "_eventsDirs", { value });
242    return value;
243  },
244
245  /**
246   * Obtain a list of all dumps pending upload.
247   *
248   * The returned value is a promise that resolves to an array of objects
249   * on success. Each element in the array has the following properties:
250   *
251   *   id (string)
252   *      The ID of the crash (a UUID).
253   *
254   *   path (string)
255   *      The filename of the crash (<UUID.dmp>)
256   *
257   *   date (Date)
258   *      When this dump was created
259   *
260   * The returned arry is sorted by the modified time of the file backing
261   * the entry, oldest to newest.
262   *
263   * @return Promise<Array>
264   */
265  pendingDumps() {
266    return this._getDirectoryEntries(this._pendingDumpsDir, this.DUMP_REGEX);
267  },
268
269  /**
270   * Obtain a list of all dump files corresponding to submitted crashes.
271   *
272   * The returned value is a promise that resolves to an Array of
273   * objects. Each object has the following properties:
274   *
275   *   path (string)
276   *     The path of the file this entry comes from.
277   *
278   *   id (string)
279   *     The crash UUID.
280   *
281   *   date (Date)
282   *     The (estimated) date this crash was submitted.
283   *
284   * The returned array is sorted by the modified time of the file backing
285   * the entry, oldest to newest.
286   *
287   * @return Promise<Array>
288   */
289  submittedDumps() {
290    return this._getDirectoryEntries(
291      this._submittedDumpsDir,
292      this.SUBMITTED_REGEX
293    );
294  },
295
296  /**
297   * Aggregates "loose" events files into the unified "database."
298   *
299   * This function should be called periodically to collect metadata from
300   * all events files into the central data store maintained by this manager.
301   *
302   * Once events have been stored in the backing store the corresponding
303   * source files are deleted.
304   *
305   * Only one aggregation operation is allowed to occur at a time. If this
306   * is called when an existing aggregation is in progress, the promise for
307   * the original call will be returned.
308   *
309   * @return promise<int> The number of event files that were examined.
310   */
311  aggregateEventsFiles() {
312    if (this._aggregatePromise) {
313      return this._aggregatePromise;
314    }
315
316    return (this._aggregatePromise = (async () => {
317      if (this._aggregatePromise) {
318        return this._aggregatePromise;
319      }
320
321      try {
322        let unprocessedFiles = await this._getUnprocessedEventsFiles();
323
324        let deletePaths = [];
325        let needsSave = false;
326
327        this._storeProtectedCount++;
328        for (let entry of unprocessedFiles) {
329          try {
330            let result = await this._processEventFile(entry);
331
332            switch (result) {
333              case this.EVENT_FILE_SUCCESS:
334                needsSave = true;
335              // Fall through.
336
337              case this.EVENT_FILE_ERROR_MALFORMED:
338              case this.EVENT_FILE_ERROR_OBSOLETE:
339                deletePaths.push(entry.path);
340                break;
341
342              case this.EVENT_FILE_ERROR_UNKNOWN_EVENT:
343                break;
344
345              default:
346                Cu.reportError(
347                  "Unhandled crash event file return code. Please " +
348                    "file a bug: " +
349                    result
350                );
351            }
352          } catch (ex) {
353            if (ex instanceof DOMException) {
354              this._log.warn("I/O error reading " + entry.path, ex);
355            } else {
356              // We should never encounter an exception. This likely represents
357              // a coding error because all errors should be detected and
358              // converted to return codes.
359              //
360              // If we get here, report the error and delete the source file
361              // so we don't see it again.
362              Cu.reportError(
363                "Exception when processing crash event file: " +
364                  Log.exceptionStr(ex)
365              );
366              deletePaths.push(entry.path);
367            }
368          }
369        }
370
371        if (needsSave) {
372          let store = await this._getStore();
373          await store.save();
374        }
375
376        for (let path of deletePaths) {
377          try {
378            await IOUtils.remove(path);
379          } catch (ex) {
380            this._log.warn("Error removing event file (" + path + ")", ex);
381          }
382        }
383
384        return unprocessedFiles.length;
385      } finally {
386        this._aggregatePromise = false;
387        this._storeProtectedCount--;
388      }
389    })());
390  },
391
392  /**
393   * Prune old crash data.
394   *
395   * @param date
396   *        (Date) The cutoff point for pruning. Crashes without data newer
397   *        than this will be pruned.
398   */
399  pruneOldCrashes(date) {
400    return (async () => {
401      let store = await this._getStore();
402      store.pruneOldCrashes(date);
403      await store.save();
404    })();
405  },
406
407  /**
408   * Run tasks that should be periodically performed.
409   */
410  runMaintenanceTasks() {
411    return (async () => {
412      await this.aggregateEventsFiles();
413
414      let offset = this.PURGE_OLDER_THAN_DAYS * MILLISECONDS_IN_DAY;
415      await this.pruneOldCrashes(new Date(Date.now() - offset));
416    })();
417  },
418
419  /**
420   * Schedule maintenance tasks for some point in the future.
421   *
422   * @param delay
423   *        (integer) Delay in milliseconds when maintenance should occur.
424   */
425  scheduleMaintenance(delay) {
426    let deferred = PromiseUtils.defer();
427
428    setTimeout(() => {
429      this.runMaintenanceTasks().then(deferred.resolve, deferred.reject);
430    }, delay);
431
432    return deferred.promise;
433  },
434
435  /**
436   * Record the occurrence of a crash.
437   *
438   * This method skips event files altogether and writes directly and
439   * immediately to the manager's data store.
440   *
441   * @param processType (string) One of the PROCESS_TYPE constants.
442   * @param crashType (string) One of the CRASH_TYPE constants.
443   * @param id (string) Crash ID. Likely a UUID.
444   * @param date (Date) When the crash occurred.
445   * @param metadata (dictionary) Crash metadata, may be empty.
446   *
447   * @return promise<null> Resolved when the store has been saved.
448   */
449  addCrash(processType, crashType, id, date, metadata) {
450    let promise = (async () => {
451      if (!this.isValidProcessType(processType)) {
452        Cu.reportError(
453          "Unhandled process type. Please file a bug: '" + processType +
454          "'. Ignore in the context of " +
455	  "test_crash_manager.js:test_addCrashWrong().");
456        return;
457      }
458
459      let store = await this._getStore();
460      if (store.addCrash(processType, crashType, id, date, metadata)) {
461        await store.save();
462      }
463
464      let deferred = this._crashPromises.get(id);
465
466      if (deferred) {
467        this._crashPromises.delete(id);
468        deferred.resolve();
469      }
470
471      if (this.isPingAllowed(processType)) {
472        this._sendCrashPing(id, processType, date, metadata);
473      }
474    })();
475
476    return promise;
477  },
478
479  /**
480   * Check that the processType parameter is a valid one:
481   *  - it is a string
482   *  - it is listed in this.processTypes
483   *
484   * @param processType (string) Process type to evaluate
485   *
486   * @return boolean True or false depending whether it is a legit one
487   */
488  isValidProcessType(processType) {
489    if (typeof(processType) !== "string") {
490      return false;
491    }
492
493    for (const pt of Object.values(this.processTypes)) {
494      if (pt === processType) {
495        return true;
496      }
497    }
498
499    return false;
500  },
501
502  /**
503   * Check that processType is allowed to send a ping
504   *
505   * @param processType (string) Process type to check for
506   *
507   * @return boolean True or False depending on whether ping is allowed
508   **/
509  isPingAllowed(processType) {
510    // gen_CrashManager.py will input the proper process pings informations.
511
512    /* SUBST: CRASH_MANAGER_PROCESS_PINGS */
513
514    // Should not even reach this because of isValidProcessType() but just in
515    // case we try to be cautious
516    if (!(processType in processPings)) {
517      return false;
518    }
519
520    return processPings[processType];
521  },
522
523  /**
524   * Returns a promise that is resolved only the crash with the specified id
525   * has been fully recorded.
526   *
527   * @param id (string) Crash ID. Likely a UUID.
528   *
529   * @return promise<null> Resolved when the crash is present.
530   */
531  async ensureCrashIsPresent(id) {
532    let store = await this._getStore();
533    let crash = store.getCrash(id);
534
535    if (crash) {
536      return Promise.resolve();
537    }
538
539    let deferred = PromiseUtils.defer();
540
541    this._crashPromises.set(id, deferred);
542    return deferred.promise;
543  },
544
545  /**
546   * Record the remote ID for a crash.
547   *
548   * @param crashID (string) Crash ID. Likely a UUID.
549   * @param remoteID (Date) Server/Breakpad ID.
550   *
551   * @return boolean True if the remote ID was recorded.
552   */
553  async setRemoteCrashID(crashID, remoteID) {
554    let store = await this._getStore();
555    if (store.setRemoteCrashID(crashID, remoteID)) {
556      await store.save();
557    }
558  },
559
560  /**
561   * Generate a submission ID for use with addSubmission{Attempt,Result}.
562   */
563  generateSubmissionID() {
564    return (
565      "sub-" +
566      Services.uuid
567        .generateUUID()
568        .toString()
569        .slice(1, -1)
570    );
571  },
572
573  /**
574   * Record the occurrence of a submission attempt for a crash.
575   *
576   * @param crashID (string) Crash ID. Likely a UUID.
577   * @param submissionID (string) Submission ID. Likely a UUID.
578   * @param date (Date) When the attempt occurred.
579   *
580   * @return boolean True if the attempt was recorded and false if not.
581   */
582  async addSubmissionAttempt(crashID, submissionID, date) {
583    let store = await this._getStore();
584    if (store.addSubmissionAttempt(crashID, submissionID, date)) {
585      await store.save();
586    }
587  },
588
589  /**
590   * Record the occurrence of a submission result for a crash.
591   *
592   * @param crashID (string) Crash ID. Likely a UUID.
593   * @param submissionID (string) Submission ID. Likely a UUID.
594   * @param date (Date) When the submission result was obtained.
595   * @param result (string) One of the SUBMISSION_RESULT constants.
596   *
597   * @return boolean True if the result was recorded and false if not.
598   */
599  async addSubmissionResult(crashID, submissionID, date, result) {
600    let store = await this._getStore();
601    if (store.addSubmissionResult(crashID, submissionID, date, result)) {
602      await store.save();
603    }
604  },
605
606  /**
607   * Set the classification of a crash.
608   *
609   * @param crashID (string) Crash ID. Likely a UUID.
610   * @param classifications (array) Crash classifications.
611   *
612   * @return boolean True if the data was recorded and false if not.
613   */
614  async setCrashClassifications(crashID, classifications) {
615    let store = await this._getStore();
616    if (store.setCrashClassifications(crashID, classifications)) {
617      await store.save();
618    }
619  },
620
621  /**
622   * Obtain the paths of all unprocessed events files.
623   *
624   * The promise-resolved array is sorted by file mtime, oldest to newest.
625   */
626  _getUnprocessedEventsFiles() {
627    return (async () => {
628      try {
629        let entries = [];
630
631        for (let dir of this._eventsDirs) {
632          for (let e of await this._getDirectoryEntries(dir, this.ALL_REGEX)) {
633            entries.push(e);
634          }
635        }
636
637        entries.sort((a, b) => {
638          return a.date - b.date;
639        });
640
641        return entries;
642      } catch (e) {
643        Cu.reportError(e);
644        return [];
645      }
646    })();
647  },
648
649  // See docs/crash-events.rst for the file format specification.
650  _processEventFile(entry) {
651    return (async () => {
652      let data = await IOUtils.read(entry.path);
653      let store = await this._getStore();
654
655      let decoder = new TextDecoder();
656      data = decoder.decode(data);
657
658      let type, time;
659      let start = 0;
660      for (let i = 0; i < 2; i++) {
661        let index = data.indexOf("\n", start);
662        if (index == -1) {
663          return this.EVENT_FILE_ERROR_MALFORMED;
664        }
665
666        let sub = data.substring(start, index);
667        switch (i) {
668          case 0:
669            type = sub;
670            break;
671          case 1:
672            time = sub;
673            try {
674              time = parseInt(time, 10);
675            } catch (ex) {
676              return this.EVENT_FILE_ERROR_MALFORMED;
677            }
678        }
679
680        start = index + 1;
681      }
682      let date = new Date(time * 1000);
683      let payload = data.substring(start);
684
685      return this._handleEventFilePayload(store, entry, type, date, payload);
686    })();
687  },
688
689  _filterAnnotations(annotations) {
690    let filteredAnnotations = {};
691    let crashReporter = Cc["@mozilla.org/toolkit/crash-reporter;1"].getService(
692      Ci.nsICrashReporter
693    );
694
695    for (let line in annotations) {
696      try {
697        if (crashReporter.isAnnotationWhitelistedForPing(line)) {
698          filteredAnnotations[line] = annotations[line];
699        }
700      } catch (e) {
701        // Silently drop unknown annotations
702      }
703    }
704
705    return filteredAnnotations;
706  },
707
708  _sendCrashPing(crashId, type, date, metadata = {}) {
709    // If we have a saved environment, use it. Otherwise report
710    // the current environment.
711    let reportMeta = Cu.cloneInto(metadata, myScope);
712    let crashEnvironment = parseAndRemoveField(
713      reportMeta,
714      "TelemetryEnvironment"
715    );
716    let sessionId = getAndRemoveField(reportMeta, "TelemetrySessionId");
717    let stackTraces = getAndRemoveField(reportMeta, "StackTraces");
718    let minidumpSha256Hash = getAndRemoveField(
719      reportMeta,
720      "MinidumpSha256Hash"
721    );
722
723    // Filter the remaining annotations to remove privacy-sensitive ones
724    reportMeta = this._filterAnnotations(reportMeta);
725
726    this._pingPromise = TelemetryController.submitExternalPing(
727      "crash",
728      {
729        version: 1,
730        crashDate: date.toISOString().slice(0, 10), // YYYY-MM-DD
731        crashTime: date.toISOString().slice(0, 13) + ":00:00.000Z", // per-hour resolution
732        sessionId,
733        crashId,
734        minidumpSha256Hash,
735        processType: type,
736        stackTraces,
737        metadata: reportMeta,
738        hasCrashEnvironment: crashEnvironment !== null,
739      },
740      {
741        addClientId: true,
742        addEnvironment: true,
743        overrideEnvironment: crashEnvironment,
744      }
745    );
746  },
747
748  _handleEventFilePayload(store, entry, type, date, payload) {
749    // The payload types and formats are documented in docs/crash-events.rst.
750    // Do not change the format of an existing type. Instead, invent a new
751    // type.
752    // DO NOT ADD NEW TYPES WITHOUT DOCUMENTING!
753    let lines = payload.split("\n");
754
755    switch (type) {
756      case "crash.main.1":
757      case "crash.main.2":
758        return this.EVENT_FILE_ERROR_OBSOLETE;
759
760      case "crash.main.3":
761        let crashID = lines[0];
762        let metadata = JSON.parse(lines[1]);
763        store.addCrash(
764          this.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT],
765          this.CRASH_TYPE_CRASH,
766          crashID,
767          date,
768          metadata
769        );
770
771        if (!("CrashPingUUID" in metadata)) {
772          // If CrashPingUUID is not present then a ping was not generated
773          // by the crashreporter for this crash so we need to send one from
774          // here.
775          this._sendCrashPing(crashID, this.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT], date, metadata);
776        }
777
778        break;
779
780      case "crash.submission.1":
781        if (lines.length == 3) {
782          let [crashID, result, remoteID] = lines;
783          store.addCrash(
784            this.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT],
785            this.CRASH_TYPE_CRASH,
786            crashID,
787            date
788          );
789
790          let submissionID = this.generateSubmissionID();
791          let succeeded = result === "true";
792          store.addSubmissionAttempt(crashID, submissionID, date);
793          store.addSubmissionResult(
794            crashID,
795            submissionID,
796            date,
797            succeeded
798              ? this.SUBMISSION_RESULT_OK
799              : this.SUBMISSION_RESULT_FAILED
800          );
801          if (succeeded) {
802            store.setRemoteCrashID(crashID, remoteID);
803          }
804        } else {
805          return this.EVENT_FILE_ERROR_MALFORMED;
806        }
807        break;
808
809      default:
810        return this.EVENT_FILE_ERROR_UNKNOWN_EVENT;
811    }
812
813    return this.EVENT_FILE_SUCCESS;
814  },
815
816  /**
817   * The resolved promise is an array of objects with the properties:
818   *
819   *   path -- String filename
820   *   id -- regexp.match()[1] (likely the crash ID)
821   *   date -- Date mtime of the file
822   */
823  _getDirectoryEntries(path, re) {
824    return (async function() {
825      let children = await IOUtils.getChildren(path);
826      let entries = [];
827
828      for (const entry of children) {
829        let stat = await IOUtils.stat(entry);
830        if (stat.type == "directory") {
831          continue;
832        }
833
834        let filename = PathUtils.filename(entry);
835        let match = re.exec(filename);
836        if (!match) {
837          continue;
838        }
839        entries.push({
840          path: entry,
841          id: match[1],
842          date: stat.lastModified,
843        });
844      }
845
846      entries.sort((a, b) => {
847        return a.date - b.date;
848      });
849
850      return entries;
851    })();
852  },
853
854  _getStore() {
855    if (this._getStoreTask) {
856      return this._getStoreTask;
857    }
858
859    return (this._getStoreTask = (async () => {
860      try {
861        if (!this._store) {
862          await IOUtils.makeDirectory(this._storeDir, {
863            permissions: 0o700,
864          });
865
866          let store = new CrashStore(
867            this._storeDir,
868            this._telemetryStoreSizeKey
869          );
870          await store.load();
871
872          this._store = store;
873          this._storeTimer = Cc["@mozilla.org/timer;1"].createInstance(
874            Ci.nsITimer
875          );
876        }
877
878        // The application can go long periods without interacting with the
879        // store. Since the store takes up resources, we automatically "free"
880        // the store after inactivity so resources can be returned to the
881        // system. We do this via a timer and a mechanism that tracks when the
882        // store is being accessed.
883        this._storeTimer.cancel();
884
885        // This callback frees resources from the store unless the store
886        // is protected from freeing by some other process.
887        let timerCB = () => {
888          if (this._storeProtectedCount) {
889            this._storeTimer.initWithCallback(
890              timerCB,
891              this.STORE_EXPIRATION_MS,
892              this._storeTimer.TYPE_ONE_SHOT
893            );
894            return;
895          }
896
897          // We kill the reference that we hold. GC will kill it later. If
898          // someone else holds a reference, that will prevent GC until that
899          // reference is gone.
900          this._store = null;
901          this._storeTimer = null;
902        };
903
904        this._storeTimer.initWithCallback(
905          timerCB,
906          this.STORE_EXPIRATION_MS,
907          this._storeTimer.TYPE_ONE_SHOT
908        );
909
910        return this._store;
911      } finally {
912        this._getStoreTask = null;
913      }
914    })());
915  },
916
917  /**
918   * Obtain information about all known crashes.
919   *
920   * Returns an array of CrashRecord instances. Instances are read-only.
921   */
922  getCrashes() {
923    return (async () => {
924      let store = await this._getStore();
925
926      return store.crashes;
927    })();
928  },
929
930  getCrashCountsByDay() {
931    return (async () => {
932      let store = await this._getStore();
933
934      return store._countsByDay;
935    })();
936  },
937});
938
939var gCrashManager;
940
941/**
942 * Interface to storage of crash data.
943 *
944 * This type handles storage of crash metadata. It exists as a separate type
945 * from the crash manager for performance reasons: since all crash metadata
946 * needs to be loaded into memory for access, we wish to easily dispose of all
947 * associated memory when this data is no longer needed. Having an isolated
948 * object whose references can easily be lost faciliates that simple disposal.
949 *
950 * When metadata is updated, the caller must explicitly persist the changes
951 * to disk. This prevents excessive I/O during updates.
952 *
953 * The store has a mechanism for ensuring it doesn't grow too large. A ceiling
954 * is placed on the number of daily events that can occur for events that can
955 * occur with relatively high frequency. If we've reached
956 * the high water mark and new data arrives, it's silently dropped.
957 * However, the count of actual events is always preserved. This allows
958 * us to report on the severity of problems beyond the storage threshold.
959 *
960 * Main process crashes are excluded from limits because they are both
961 * important and should be rare.
962 *
963 * @param storeDir (string)
964 *        Directory the store should be located in.
965 * @param telemetrySizeKey (string)
966 *        The telemetry histogram that should be used to store the size
967 *        of the data file.
968 */
969function CrashStore(storeDir, telemetrySizeKey) {
970  this._storeDir = storeDir;
971  this._telemetrySizeKey = telemetrySizeKey;
972
973  this._storePath = PathUtils.join(storeDir, "store.json.mozlz4");
974
975  // Holds the read data from disk.
976  this._data = null;
977
978  // Maps days since UNIX epoch to a Map of event types to counts.
979  // This data structure is populated when the JSON file is loaded
980  // and is also updated when new events are added.
981  this._countsByDay = new Map();
982}
983
984CrashStore.prototype = Object.freeze({
985  // Maximum number of events to store per day. This establishes a
986  // ceiling on the per-type/per-day records that will be stored.
987  HIGH_WATER_DAILY_THRESHOLD: 500,
988
989  /**
990   * Reset all data.
991   */
992  reset() {
993    this._data = {
994      v: 1,
995      crashes: new Map(),
996      corruptDate: null,
997    };
998    this._countsByDay = new Map();
999  },
1000
1001  /**
1002   * Load data from disk.
1003   *
1004   * @return Promise
1005   */
1006  load() {
1007    return (async () => {
1008      // Loading replaces data.
1009      this.reset();
1010
1011      try {
1012        let decoder = new TextDecoder();
1013        let data = await IOUtils.read(this._storePath, { decompress: true });
1014        data = JSON.parse(decoder.decode(data));
1015
1016        if (data.corruptDate) {
1017          this._data.corruptDate = new Date(data.corruptDate);
1018        }
1019
1020        // actualCounts is used to validate that the derived counts by
1021        // days stored in the payload matches up to actual data.
1022        let actualCounts = new Map();
1023
1024        // In the past, submissions were stored as separate crash records
1025        // with an id of e.g. "someID-submission". If we find IDs ending
1026        // with "-submission", we will need to convert the data to be stored
1027        // as actual submissions.
1028        //
1029        // The old way of storing submissions was used from FF33 - FF34. We
1030        // drop this old data on the floor.
1031        for (let id in data.crashes) {
1032          if (id.endsWith("-submission")) {
1033            continue;
1034          }
1035
1036          let crash = data.crashes[id];
1037          let denormalized = this._denormalize(crash);
1038
1039          denormalized.submissions = new Map();
1040          if (crash.submissions) {
1041            for (let submissionID in crash.submissions) {
1042              let submission = crash.submissions[submissionID];
1043              denormalized.submissions.set(
1044                submissionID,
1045                this._denormalize(submission)
1046              );
1047            }
1048          }
1049
1050          this._data.crashes.set(id, denormalized);
1051
1052          let key =
1053            dateToDays(denormalized.crashDate) + "-" + denormalized.type;
1054          actualCounts.set(key, (actualCounts.get(key) || 0) + 1);
1055
1056          // If we have an OOM size, count the crash as an OOM in addition to
1057          // being a main process crash.
1058          if (
1059            denormalized.metadata &&
1060            denormalized.metadata.OOMAllocationSize
1061          ) {
1062            let oomKey = key + "-oom";
1063            actualCounts.set(oomKey, (actualCounts.get(oomKey) || 0) + 1);
1064          }
1065        }
1066
1067        // The validation in this loop is arguably not necessary. We perform
1068        // it as a defense against unknown bugs.
1069        for (let dayKey in data.countsByDay) {
1070          let day = parseInt(dayKey, 10);
1071          for (let type in data.countsByDay[day]) {
1072            this._ensureCountsForDay(day);
1073
1074            let count = data.countsByDay[day][type];
1075            let key = day + "-" + type;
1076
1077            // If the payload says we have data for a given day but we
1078            // don't, the payload is wrong. Ignore it.
1079            if (!actualCounts.has(key)) {
1080              continue;
1081            }
1082
1083            // If we encountered more data in the payload than what the
1084            // data structure says, use the proper value.
1085            count = Math.max(count, actualCounts.get(key));
1086
1087            this._countsByDay.get(day).set(type, count);
1088          }
1089        }
1090      } catch (ex) {
1091        // Missing files (first use) are allowed.
1092        if (!(ex instanceof DOMException) || ex.name != "NotFoundError") {
1093          // If we can't load for any reason, mark a corrupt date in the instance
1094          // and swallow the error.
1095          //
1096          // The marking of a corrupted file is intentionally not persisted to
1097          // disk yet. Instead, we wait until the next save(). This is to give
1098          // non-permanent failures the opportunity to recover on their own.
1099          this._data.corruptDate = new Date();
1100        }
1101      }
1102    })();
1103  },
1104
1105  /**
1106   * Save data to disk.
1107   *
1108   * @return Promise<null>
1109   */
1110  save() {
1111    return (async () => {
1112      if (!this._data) {
1113        return;
1114      }
1115
1116      let normalized = {
1117        // The version should be incremented whenever the format
1118        // changes.
1119        v: 1,
1120        // Maps crash IDs to objects defining the crash.
1121        crashes: {},
1122        // Maps days since UNIX epoch to objects mapping event types to
1123        // counts. This is a mirror of this._countsByDay. e.g.
1124        // {
1125        //    15000: {
1126        //        "main-crash": 2,
1127        //        "plugin-crash": 1
1128        //    }
1129        // }
1130        countsByDay: {},
1131
1132        // When the store was last corrupted.
1133        corruptDate: null,
1134      };
1135
1136      if (this._data.corruptDate) {
1137        normalized.corruptDate = this._data.corruptDate.getTime();
1138      }
1139
1140      for (let [id, crash] of this._data.crashes) {
1141        let c = this._normalize(crash);
1142
1143        c.submissions = {};
1144        for (let [submissionID, submission] of crash.submissions) {
1145          c.submissions[submissionID] = this._normalize(submission);
1146        }
1147
1148        normalized.crashes[id] = c;
1149      }
1150
1151      for (let [day, m] of this._countsByDay) {
1152        normalized.countsByDay[day] = {};
1153        for (let [type, count] of m) {
1154          normalized.countsByDay[day][type] = count;
1155        }
1156      }
1157
1158      let encoder = new TextEncoder();
1159      let data = encoder.encode(JSON.stringify(normalized));
1160      let size = await IOUtils.write(this._storePath, data, {
1161        tmpPath: this._storePath + ".tmp",
1162        compress: true,
1163      });
1164      if (this._telemetrySizeKey) {
1165        Services.telemetry.getHistogramById(this._telemetrySizeKey).add(size);
1166      }
1167    })();
1168  },
1169
1170  /**
1171   * Normalize an object into one fit for serialization.
1172   *
1173   * This function along with _denormalize() serve to hack around the
1174   * default handling of Date JSON serialization because Date serialization
1175   * is undefined by JSON.
1176   *
1177   * Fields ending with "Date" are assumed to contain Date instances.
1178   * We convert these to milliseconds since epoch on output and back to
1179   * Date on input.
1180   */
1181  _normalize(o) {
1182    let normalized = {};
1183
1184    for (let k in o) {
1185      let v = o[k];
1186      if (v && k.endsWith("Date")) {
1187        normalized[k] = v.getTime();
1188      } else {
1189        normalized[k] = v;
1190      }
1191    }
1192
1193    return normalized;
1194  },
1195
1196  /**
1197   * Convert a serialized object back to its native form.
1198   */
1199  _denormalize(o) {
1200    let n = {};
1201
1202    for (let k in o) {
1203      let v = o[k];
1204      if (v && k.endsWith("Date")) {
1205        n[k] = new Date(parseInt(v, 10));
1206      } else {
1207        n[k] = v;
1208      }
1209    }
1210
1211    return n;
1212  },
1213
1214  /**
1215   * Prune old crash data.
1216   *
1217   * Crashes without recent activity are pruned from the store so the
1218   * size of the store is not unbounded. If there is activity on a crash,
1219   * that activity will keep the crash and all its data around for longer.
1220   *
1221   * @param date
1222   *        (Date) The cutoff at which data will be pruned. If an entry
1223   *        doesn't have data newer than this, it will be pruned.
1224   */
1225  pruneOldCrashes(date) {
1226    for (let crash of this.crashes) {
1227      let newest = crash.newestDate;
1228      if (!newest || newest.getTime() < date.getTime()) {
1229        this._data.crashes.delete(crash.id);
1230      }
1231    }
1232  },
1233
1234  /**
1235   * Date the store was last corrupted and required a reset.
1236   *
1237   * May be null (no corruption has ever occurred) or a Date instance.
1238   */
1239  get corruptDate() {
1240    return this._data.corruptDate;
1241  },
1242
1243  /**
1244   * The number of distinct crashes tracked.
1245   */
1246  get crashesCount() {
1247    return this._data.crashes.size;
1248  },
1249
1250  /**
1251   * All crashes tracked.
1252   *
1253   * This is an array of CrashRecord.
1254   */
1255  get crashes() {
1256    let crashes = [];
1257    for (let [, crash] of this._data.crashes) {
1258      crashes.push(new CrashRecord(crash));
1259    }
1260
1261    return crashes;
1262  },
1263
1264  /**
1265   * Obtain a particular crash from its ID.
1266   *
1267   * A CrashRecord will be returned if the crash exists. null will be returned
1268   * if the crash is unknown.
1269   */
1270  getCrash(id) {
1271    for (let crash of this.crashes) {
1272      if (crash.id == id) {
1273        return crash;
1274      }
1275    }
1276
1277    return null;
1278  },
1279
1280  _ensureCountsForDay(day) {
1281    if (!this._countsByDay.has(day)) {
1282      this._countsByDay.set(day, new Map());
1283    }
1284  },
1285
1286  /**
1287   * Ensure the crash record is present in storage.
1288   *
1289   * Returns the crash record if we're allowed to store it or null
1290   * if we've hit the high water mark.
1291   *
1292   * @param processType
1293   *        (string) One of the PROCESS_TYPE constants.
1294   * @param crashType
1295   *        (string) One of the CRASH_TYPE constants.
1296   * @param id
1297   *        (string) The crash ID.
1298   * @param date
1299   *        (Date) When this crash occurred.
1300   * @param metadata
1301   *        (dictionary) Crash metadata, may be empty.
1302   *
1303   * @return null | object crash record
1304   */
1305  _ensureCrashRecord(processType, crashType, id, date, metadata) {
1306    if (!id) {
1307      // Crashes are keyed on ID, so it's not really helpful to store crashes
1308      // without IDs.
1309      return null;
1310    }
1311
1312    let type = processType + "-" + crashType;
1313
1314    if (!this._data.crashes.has(id)) {
1315      let day = dateToDays(date);
1316      this._ensureCountsForDay(day);
1317
1318      let count = (this._countsByDay.get(day).get(type) || 0) + 1;
1319      this._countsByDay.get(day).set(type, count);
1320
1321      if (
1322        count > this.HIGH_WATER_DAILY_THRESHOLD &&
1323        processType != CrashManager.prototype.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT]
1324      ) {
1325        return null;
1326      }
1327
1328      // If we have an OOM size, count the crash as an OOM in addition to
1329      // being a main process crash.
1330      if (metadata && metadata.OOMAllocationSize) {
1331        let oomType = type + "-oom";
1332        let oomCount = (this._countsByDay.get(day).get(oomType) || 0) + 1;
1333        this._countsByDay.get(day).set(oomType, oomCount);
1334      }
1335
1336      this._data.crashes.set(id, {
1337        id,
1338        remoteID: null,
1339        type,
1340        crashDate: date,
1341        submissions: new Map(),
1342        classifications: [],
1343        metadata,
1344      });
1345    }
1346
1347    let crash = this._data.crashes.get(id);
1348    crash.type = type;
1349    crash.crashDate = date;
1350
1351    return crash;
1352  },
1353
1354  /**
1355   * Record the occurrence of a crash.
1356   *
1357   * @param processType (string) One of the PROCESS_TYPE constants.
1358   * @param crashType (string) One of the CRASH_TYPE constants.
1359   * @param id (string) Crash ID. Likely a UUID.
1360   * @param date (Date) When the crash occurred.
1361   * @param metadata (dictionary) Crash metadata, may be empty.
1362   *
1363   * @return boolean True if the crash was recorded and false if not.
1364   */
1365  addCrash(processType, crashType, id, date, metadata) {
1366    return !!this._ensureCrashRecord(
1367      processType,
1368      crashType,
1369      id,
1370      date,
1371      metadata
1372    );
1373  },
1374
1375  /**
1376   * @return boolean True if the remote ID was recorded and false if not.
1377   */
1378  setRemoteCrashID(crashID, remoteID) {
1379    let crash = this._data.crashes.get(crashID);
1380    if (!crash || !remoteID) {
1381      return false;
1382    }
1383
1384    crash.remoteID = remoteID;
1385    return true;
1386  },
1387
1388  /**
1389   * @param processType (string) One of the PROCESS_TYPE constants.
1390   * @param crashType (string) One of the CRASH_TYPE constants.
1391   *
1392   * @return array of crashes
1393   */
1394  getCrashesOfType(processType, crashType) {
1395    let crashes = [];
1396    for (let crash of this.crashes) {
1397      if (crash.isOfType(processType, crashType)) {
1398        crashes.push(crash);
1399      }
1400    }
1401
1402    return crashes;
1403  },
1404
1405  /**
1406   * Ensure the submission record is present in storage.
1407   * @returns [submission, crash]
1408   */
1409  _ensureSubmissionRecord(crashID, submissionID) {
1410    let crash = this._data.crashes.get(crashID);
1411    if (!crash || !submissionID) {
1412      return null;
1413    }
1414
1415    if (!crash.submissions.has(submissionID)) {
1416      crash.submissions.set(submissionID, {
1417        requestDate: null,
1418        responseDate: null,
1419        result: null,
1420      });
1421    }
1422
1423    return [crash.submissions.get(submissionID), crash];
1424  },
1425
1426  /**
1427   * @return boolean True if the attempt was recorded.
1428   */
1429  addSubmissionAttempt(crashID, submissionID, date) {
1430    let [submission, crash] = this._ensureSubmissionRecord(
1431      crashID,
1432      submissionID
1433    );
1434    if (!submission) {
1435      return false;
1436    }
1437
1438    submission.requestDate = date;
1439    Services.telemetry
1440      .getKeyedHistogramById("PROCESS_CRASH_SUBMIT_ATTEMPT")
1441      .add(crash.type, 1);
1442    return true;
1443  },
1444
1445  /**
1446   * @return boolean True if the response was recorded.
1447   */
1448  addSubmissionResult(crashID, submissionID, date, result) {
1449    let crash = this._data.crashes.get(crashID);
1450    if (!crash || !submissionID) {
1451      return false;
1452    }
1453    let submission = crash.submissions.get(submissionID);
1454    if (!submission) {
1455      return false;
1456    }
1457
1458    submission.responseDate = date;
1459    submission.result = result;
1460    Services.telemetry
1461      .getKeyedHistogramById("PROCESS_CRASH_SUBMIT_SUCCESS")
1462      .add(crash.type, result == "ok");
1463    return true;
1464  },
1465
1466  /**
1467   * @return boolean True if the classifications were set.
1468   */
1469  setCrashClassifications(crashID, classifications) {
1470    let crash = this._data.crashes.get(crashID);
1471    if (!crash) {
1472      return false;
1473    }
1474
1475    crash.classifications = classifications;
1476    return true;
1477  },
1478});
1479
1480/**
1481 * Represents an individual crash with metadata.
1482 *
1483 * This is a wrapper around the low-level anonymous JS objects that define
1484 * crashes. It exposes a consistent and helpful API.
1485 *
1486 * Instances of this type should only be constructured inside this module,
1487 * not externally. The constructor is not considered a public API.
1488 *
1489 * @param o (object)
1490 *        The crash's entry from the CrashStore.
1491 */
1492function CrashRecord(o) {
1493  this._o = o;
1494}
1495
1496CrashRecord.prototype = Object.freeze({
1497  get id() {
1498    return this._o.id;
1499  },
1500
1501  get remoteID() {
1502    return this._o.remoteID;
1503  },
1504
1505  get crashDate() {
1506    return this._o.crashDate;
1507  },
1508
1509  /**
1510   * Obtain the newest date in this record.
1511   *
1512   * This is a convenience getter. The returned value is used to determine when
1513   * to expire a record.
1514   */
1515  get newestDate() {
1516    // We currently only have 1 date, so this is easy.
1517    return this._o.crashDate;
1518  },
1519
1520  get oldestDate() {
1521    return this._o.crashDate;
1522  },
1523
1524  get type() {
1525    return this._o.type;
1526  },
1527
1528  isOfType(processType, crashType) {
1529    return processType + "-" + crashType == this.type;
1530  },
1531
1532  get submissions() {
1533    return this._o.submissions;
1534  },
1535
1536  get classifications() {
1537    return this._o.classifications;
1538  },
1539
1540  get metadata() {
1541    return this._o.metadata;
1542  },
1543});
1544
1545XPCOMUtils.defineLazyGetter(CrashManager, "_log", () =>
1546  Log.repository.getLogger("Crashes.CrashManager")
1547);
1548
1549/**
1550 * Obtain the global CrashManager instance used by the running application.
1551 *
1552 * CrashManager is likely only ever instantiated once per application lifetime.
1553 * The main reason it's implemented as a reusable type is to facilitate testing.
1554 */
1555XPCOMUtils.defineLazyGetter(CrashManager, "Singleton", function() {
1556  if (gCrashManager) {
1557    return gCrashManager;
1558  }
1559
1560  gCrashManager = new CrashManager({
1561    telemetryStoreSizeKey: "CRASH_STORE_COMPRESSED_BYTES",
1562  });
1563
1564  // Automatically aggregate event files shortly after startup. This
1565  // ensures it happens with some frequency.
1566  //
1567  // There are performance considerations here. While this is doing
1568  // work and could negatively impact performance, the amount of work
1569  // is kept small per run by periodically aggregating event files.
1570  // Furthermore, well-behaving installs should not have much work
1571  // here to do. If there is a lot of work, that install has bigger
1572  // issues beyond reduced performance near startup.
1573  gCrashManager.scheduleMaintenance(AGGREGATE_STARTUP_DELAY_MS);
1574
1575  return gCrashManager;
1576});
1577
1578function getCrashManager() {
1579  return CrashManager.Singleton;
1580}
1581
1582/**
1583 * Used for tests to check the crash manager is created on profile creation.
1584 *
1585 * @returns {CrashManager}
1586 */
1587function getCrashManagerNoCreate() {
1588  return gCrashManager;
1589}
1590