1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5"use strict";
6
7const myScope = this;
8
9const { PromiseUtils } = ChromeUtils.import(
10  "resource://gre/modules/PromiseUtils.jsm"
11);
12const { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm");
13const { setTimeout } = ChromeUtils.import("resource://gre/modules/Timer.jsm");
14const { XPCOMUtils } = ChromeUtils.import(
15  "resource://gre/modules/XPCOMUtils.jsm"
16);
17
18XPCOMUtils.defineLazyModuleGetters(this, {
19  Log: "resource://gre/modules/Log.jsm",
20  TelemetryController: "resource://gre/modules/TelemetryController.jsm",
21});
22
23var EXPORTED_SYMBOLS = ["CrashManager", "getCrashManager"];
24
25/**
26 * How long to wait after application startup before crash event files are
27 * automatically aggregated.
28 *
29 * We defer aggregation for performance reasons, as we don't want too many
30 * services competing for I/O immediately after startup.
31 */
32const AGGREGATE_STARTUP_DELAY_MS = 57000;
33
34const MILLISECONDS_IN_DAY = 24 * 60 * 60 * 1000;
35
36// Converts Date to days since UNIX epoch.
37// This was copied from /services/metrics.storage.jsm. The implementation
38// does not account for leap seconds.
39function dateToDays(date) {
40  return Math.floor(date.getTime() / MILLISECONDS_IN_DAY);
41}
42
43/**
44 * Get a field from the specified object and remove it.
45 *
46 * @param obj {Object} The object holding the field
47 * @param field {String} The name of the field to be parsed and removed
48 *
49 * @returns {String} the field contents as a string, null if none was found
50 */
51function getAndRemoveField(obj, field) {
52  let value = null;
53
54  if (field in obj) {
55    value = obj[field];
56    delete obj[field];
57  }
58
59  return value;
60}
61
62/**
63 * Parse the string stored in the specified field as JSON and then remove the
64 * field from the object.
65 *
66 * @param obj {Object} The object holding the field
67 * @param field {String} The name of the field to be parsed and removed
68 *
69 * @returns {Object} the parsed object, null if none was found
70 */
71function parseAndRemoveField(obj, field) {
72  let value = null;
73
74  if (field in obj) {
75    try {
76      value = JSON.parse(obj[field]);
77    } catch (e) {
78      Cu.reportError(e);
79    }
80
81    delete obj[field];
82  }
83
84  return value;
85}
86
87/**
88 * A gateway to crash-related data.
89 *
90 * This type is generic and can be instantiated any number of times.
91 * However, most applications will typically only have one instance
92 * instantiated and that instance will point to profile and user appdata
93 * directories.
94 *
95 * Instances are created by passing an object with properties.
96 * Recognized properties are:
97 *
98 *   pendingDumpsDir (string) (required)
99 *     Where dump files that haven't been uploaded are located.
100 *
101 *   submittedDumpsDir (string) (required)
102 *     Where records of uploaded dumps are located.
103 *
104 *   eventsDirs (array)
105 *     Directories (defined as strings) where events files are written. This
106 *     instance will collects events from files in the directories specified.
107 *
108 *   storeDir (string)
109 *     Directory we will use for our data store. This instance will write
110 *     data files into the directory specified.
111 *
112 *   telemetryStoreSizeKey (string)
113 *     Telemetry histogram to report store size under.
114 */
115var CrashManager = function(options) {
116  for (let k in options) {
117    let value = options[k];
118
119    switch (k) {
120      case "pendingDumpsDir":
121      case "submittedDumpsDir":
122      case "eventsDirs":
123      case "storeDir":
124        let key = "_" + k;
125        delete this[key];
126        Object.defineProperty(this, key, { value });
127        break;
128      case "telemetryStoreSizeKey":
129        this._telemetryStoreSizeKey = value;
130        break;
131
132      default:
133        throw new Error("Unknown property in options: " + k);
134    }
135  }
136
137  // Promise for in-progress aggregation operation. We store it on the
138  // object so it can be returned for in-progress operations.
139  this._aggregatePromise = null;
140
141  // Map of crash ID / promise tuples used to track adding new crashes.
142  this._crashPromises = new Map();
143
144  // Promise for the crash ping used only for testing.
145  this._pingPromise = null;
146
147  // The CrashStore currently attached to this object.
148  this._store = null;
149
150  // A Task to retrieve the store. This is needed to avoid races when
151  // _getStore() is called multiple times in a short interval.
152  this._getStoreTask = null;
153
154  // The timer controlling the expiration of the CrashStore instance.
155  this._storeTimer = null;
156
157  // This is a semaphore that prevents the store from being freed by our
158  // timer-based resource freeing mechanism.
159  this._storeProtectedCount = 0;
160};
161
162CrashManager.prototype = Object.freeze({
163  // A crash in the main process.
164  PROCESS_TYPE_MAIN: "main",
165
166  // A crash in a content process.
167  PROCESS_TYPE_CONTENT: "content",
168
169  // A crash in a Gecko media plugin process.
170  PROCESS_TYPE_GMPLUGIN: "gmplugin",
171
172  // A crash in the GPU process.
173  PROCESS_TYPE_GPU: "gpu",
174
175  // A crash in the VR process.
176  PROCESS_TYPE_VR: "vr",
177
178  // A crash in the RDD process.
179  PROCESS_TYPE_RDD: "rdd",
180
181  // A crash in the socket process.
182  PROCESS_TYPE_SOCKET: "socket",
183
184  // A real crash.
185  CRASH_TYPE_CRASH: "crash",
186
187  // A hang.
188  CRASH_TYPE_HANG: "hang",
189
190  // Submission result values.
191  SUBMISSION_RESULT_OK: "ok",
192  SUBMISSION_RESULT_FAILED: "failed",
193
194  DUMP_REGEX: /^([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.dmp$/i,
195  SUBMITTED_REGEX: /^bp-(?:hr-)?([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.txt$/i,
196  ALL_REGEX: /^(.*)$/,
197
198  // How long the store object should persist in memory before being
199  // automatically garbage collected.
200  STORE_EXPIRATION_MS: 60 * 1000,
201
202  // Number of days after which a crash with no activity will get purged.
203  PURGE_OLDER_THAN_DAYS: 180,
204
205  // The following are return codes for individual event file processing.
206  // File processed OK.
207  EVENT_FILE_SUCCESS: "ok",
208  // The event appears to be malformed.
209  EVENT_FILE_ERROR_MALFORMED: "malformed",
210  // The event is obsolete.
211  EVENT_FILE_ERROR_OBSOLETE: "obsolete",
212  // The type of event is unknown.
213  EVENT_FILE_ERROR_UNKNOWN_EVENT: "unknown-event",
214
215  _lazyGetDir(field, path, leaf) {
216    delete this[field];
217    let value = PathUtils.join(path, leaf);
218    Object.defineProperty(this, field, { value });
219    return value;
220  },
221
222  get _crDir() {
223    return this._lazyGetDir(
224      "_crDir",
225      Services.dirsvc.get("UAppData", Ci.nsIFile).path,
226      "Crash Reports"
227    );
228  },
229
230  get _storeDir() {
231    return this._lazyGetDir(
232      "_storeDir",
233      Services.dirsvc.get("ProfD", Ci.nsIFile).path,
234      "crashes"
235    );
236  },
237
238  get _pendingDumpsDir() {
239    return this._lazyGetDir("_pendingDumpsDir", this._crDir, "pending");
240  },
241
242  get _submittedDumpsDir() {
243    return this._lazyGetDir("_submittedDumpsDir", this._crDir, "submitted");
244  },
245
246  get _eventsDirs() {
247    delete this._eventsDirs;
248    let value = [
249      PathUtils.join(this._crDir, "events"),
250      PathUtils.join(this._storeDir, "events"),
251    ];
252    Object.defineProperty(this, "_eventsDirs", { value });
253    return value;
254  },
255
256  /**
257   * Obtain a list of all dumps pending upload.
258   *
259   * The returned value is a promise that resolves to an array of objects
260   * on success. Each element in the array has the following properties:
261   *
262   *   id (string)
263   *      The ID of the crash (a UUID).
264   *
265   *   path (string)
266   *      The filename of the crash (<UUID.dmp>)
267   *
268   *   date (Date)
269   *      When this dump was created
270   *
271   * The returned arry is sorted by the modified time of the file backing
272   * the entry, oldest to newest.
273   *
274   * @return Promise<Array>
275   */
276  pendingDumps() {
277    return this._getDirectoryEntries(this._pendingDumpsDir, this.DUMP_REGEX);
278  },
279
280  /**
281   * Obtain a list of all dump files corresponding to submitted crashes.
282   *
283   * The returned value is a promise that resolves to an Array of
284   * objects. Each object has the following properties:
285   *
286   *   path (string)
287   *     The path of the file this entry comes from.
288   *
289   *   id (string)
290   *     The crash UUID.
291   *
292   *   date (Date)
293   *     The (estimated) date this crash was submitted.
294   *
295   * The returned array is sorted by the modified time of the file backing
296   * the entry, oldest to newest.
297   *
298   * @return Promise<Array>
299   */
300  submittedDumps() {
301    return this._getDirectoryEntries(
302      this._submittedDumpsDir,
303      this.SUBMITTED_REGEX
304    );
305  },
306
307  /**
308   * Aggregates "loose" events files into the unified "database."
309   *
310   * This function should be called periodically to collect metadata from
311   * all events files into the central data store maintained by this manager.
312   *
313   * Once events have been stored in the backing store the corresponding
314   * source files are deleted.
315   *
316   * Only one aggregation operation is allowed to occur at a time. If this
317   * is called when an existing aggregation is in progress, the promise for
318   * the original call will be returned.
319   *
320   * @return promise<int> The number of event files that were examined.
321   */
322  aggregateEventsFiles() {
323    if (this._aggregatePromise) {
324      return this._aggregatePromise;
325    }
326
327    return (this._aggregatePromise = (async () => {
328      if (this._aggregatePromise) {
329        return this._aggregatePromise;
330      }
331
332      try {
333        let unprocessedFiles = await this._getUnprocessedEventsFiles();
334
335        let deletePaths = [];
336        let needsSave = false;
337
338        this._storeProtectedCount++;
339        for (let entry of unprocessedFiles) {
340          try {
341            let result = await this._processEventFile(entry);
342
343            switch (result) {
344              case this.EVENT_FILE_SUCCESS:
345                needsSave = true;
346              // Fall through.
347
348              case this.EVENT_FILE_ERROR_MALFORMED:
349              case this.EVENT_FILE_ERROR_OBSOLETE:
350                deletePaths.push(entry.path);
351                break;
352
353              case this.EVENT_FILE_ERROR_UNKNOWN_EVENT:
354                break;
355
356              default:
357                Cu.reportError(
358                  "Unhandled crash event file return code. Please " +
359                    "file a bug: " +
360                    result
361                );
362            }
363          } catch (ex) {
364            if (ex instanceof DOMException) {
365              this._log.warn("I/O error reading " + entry.path, ex);
366            } else {
367              // We should never encounter an exception. This likely represents
368              // a coding error because all errors should be detected and
369              // converted to return codes.
370              //
371              // If we get here, report the error and delete the source file
372              // so we don't see it again.
373              Cu.reportError(
374                "Exception when processing crash event file: " +
375                  Log.exceptionStr(ex)
376              );
377              deletePaths.push(entry.path);
378            }
379          }
380        }
381
382        if (needsSave) {
383          let store = await this._getStore();
384          await store.save();
385        }
386
387        for (let path of deletePaths) {
388          try {
389            await IOUtils.remove(path);
390          } catch (ex) {
391            this._log.warn("Error removing event file (" + path + ")", ex);
392          }
393        }
394
395        return unprocessedFiles.length;
396      } finally {
397        this._aggregatePromise = false;
398        this._storeProtectedCount--;
399      }
400    })());
401  },
402
403  /**
404   * Prune old crash data.
405   *
406   * @param date
407   *        (Date) The cutoff point for pruning. Crashes without data newer
408   *        than this will be pruned.
409   */
410  pruneOldCrashes(date) {
411    return (async () => {
412      let store = await this._getStore();
413      store.pruneOldCrashes(date);
414      await store.save();
415    })();
416  },
417
418  /**
419   * Run tasks that should be periodically performed.
420   */
421  runMaintenanceTasks() {
422    return (async () => {
423      await this.aggregateEventsFiles();
424
425      let offset = this.PURGE_OLDER_THAN_DAYS * MILLISECONDS_IN_DAY;
426      await this.pruneOldCrashes(new Date(Date.now() - offset));
427    })();
428  },
429
430  /**
431   * Schedule maintenance tasks for some point in the future.
432   *
433   * @param delay
434   *        (integer) Delay in milliseconds when maintenance should occur.
435   */
436  scheduleMaintenance(delay) {
437    let deferred = PromiseUtils.defer();
438
439    setTimeout(() => {
440      this.runMaintenanceTasks().then(deferred.resolve, deferred.reject);
441    }, delay);
442
443    return deferred.promise;
444  },
445
446  /**
447   * Record the occurrence of a crash.
448   *
449   * This method skips event files altogether and writes directly and
450   * immediately to the manager's data store.
451   *
452   * @param processType (string) One of the PROCESS_TYPE constants.
453   * @param crashType (string) One of the CRASH_TYPE constants.
454   * @param id (string) Crash ID. Likely a UUID.
455   * @param date (Date) When the crash occurred.
456   * @param metadata (dictionary) Crash metadata, may be empty.
457   *
458   * @return promise<null> Resolved when the store has been saved.
459   */
460  addCrash(processType, crashType, id, date, metadata) {
461    let promise = (async () => {
462      let store = await this._getStore();
463      if (store.addCrash(processType, crashType, id, date, metadata)) {
464        await store.save();
465      }
466
467      let deferred = this._crashPromises.get(id);
468
469      if (deferred) {
470        this._crashPromises.delete(id);
471        deferred.resolve();
472      }
473
474      // Send a telemetry ping for each non-main process crash
475      if (
476        processType === this.PROCESS_TYPE_CONTENT ||
477        processType === this.PROCESS_TYPE_GPU ||
478        processType === this.PROCESS_TYPE_VR ||
479        processType === this.PROCESS_TYPE_RDD ||
480        processType === this.PROCESS_TYPE_SOCKET
481      ) {
482        this._sendCrashPing(id, processType, date, metadata);
483      }
484    })();
485
486    return promise;
487  },
488
489  /**
490   * Returns a promise that is resolved only the crash with the specified id
491   * has been fully recorded.
492   *
493   * @param id (string) Crash ID. Likely a UUID.
494   *
495   * @return promise<null> Resolved when the crash is present.
496   */
497  async ensureCrashIsPresent(id) {
498    let store = await this._getStore();
499    let crash = store.getCrash(id);
500
501    if (crash) {
502      return Promise.resolve();
503    }
504
505    let deferred = PromiseUtils.defer();
506
507    this._crashPromises.set(id, deferred);
508    return deferred.promise;
509  },
510
511  /**
512   * Record the remote ID for a crash.
513   *
514   * @param crashID (string) Crash ID. Likely a UUID.
515   * @param remoteID (Date) Server/Breakpad ID.
516   *
517   * @return boolean True if the remote ID was recorded.
518   */
519  async setRemoteCrashID(crashID, remoteID) {
520    let store = await this._getStore();
521    if (store.setRemoteCrashID(crashID, remoteID)) {
522      await store.save();
523    }
524  },
525
526  /**
527   * Generate a submission ID for use with addSubmission{Attempt,Result}.
528   */
529  generateSubmissionID() {
530    return (
531      "sub-" +
532      Cc["@mozilla.org/uuid-generator;1"]
533        .getService(Ci.nsIUUIDGenerator)
534        .generateUUID()
535        .toString()
536        .slice(1, -1)
537    );
538  },
539
540  /**
541   * Record the occurrence of a submission attempt for a crash.
542   *
543   * @param crashID (string) Crash ID. Likely a UUID.
544   * @param submissionID (string) Submission ID. Likely a UUID.
545   * @param date (Date) When the attempt occurred.
546   *
547   * @return boolean True if the attempt was recorded and false if not.
548   */
549  async addSubmissionAttempt(crashID, submissionID, date) {
550    let store = await this._getStore();
551    if (store.addSubmissionAttempt(crashID, submissionID, date)) {
552      await store.save();
553    }
554  },
555
556  /**
557   * Record the occurrence of a submission result for a crash.
558   *
559   * @param crashID (string) Crash ID. Likely a UUID.
560   * @param submissionID (string) Submission ID. Likely a UUID.
561   * @param date (Date) When the submission result was obtained.
562   * @param result (string) One of the SUBMISSION_RESULT constants.
563   *
564   * @return boolean True if the result was recorded and false if not.
565   */
566  async addSubmissionResult(crashID, submissionID, date, result) {
567    let store = await this._getStore();
568    if (store.addSubmissionResult(crashID, submissionID, date, result)) {
569      await store.save();
570    }
571  },
572
573  /**
574   * Set the classification of a crash.
575   *
576   * @param crashID (string) Crash ID. Likely a UUID.
577   * @param classifications (array) Crash classifications.
578   *
579   * @return boolean True if the data was recorded and false if not.
580   */
581  async setCrashClassifications(crashID, classifications) {
582    let store = await this._getStore();
583    if (store.setCrashClassifications(crashID, classifications)) {
584      await store.save();
585    }
586  },
587
588  /**
589   * Obtain the paths of all unprocessed events files.
590   *
591   * The promise-resolved array is sorted by file mtime, oldest to newest.
592   */
593  _getUnprocessedEventsFiles() {
594    return (async () => {
595      try {
596        let entries = [];
597
598        for (let dir of this._eventsDirs) {
599          for (let e of await this._getDirectoryEntries(dir, this.ALL_REGEX)) {
600            entries.push(e);
601          }
602        }
603
604        entries.sort((a, b) => {
605          return a.date - b.date;
606        });
607
608        return entries;
609      } catch (e) {
610        Cu.reportError(e);
611        return [];
612      }
613    })();
614  },
615
616  // See docs/crash-events.rst for the file format specification.
617  _processEventFile(entry) {
618    return (async () => {
619      let data = await IOUtils.read(entry.path);
620      let store = await this._getStore();
621
622      let decoder = new TextDecoder();
623      data = decoder.decode(data);
624
625      let type, time;
626      let start = 0;
627      for (let i = 0; i < 2; i++) {
628        let index = data.indexOf("\n", start);
629        if (index == -1) {
630          return this.EVENT_FILE_ERROR_MALFORMED;
631        }
632
633        let sub = data.substring(start, index);
634        switch (i) {
635          case 0:
636            type = sub;
637            break;
638          case 1:
639            time = sub;
640            try {
641              time = parseInt(time, 10);
642            } catch (ex) {
643              return this.EVENT_FILE_ERROR_MALFORMED;
644            }
645        }
646
647        start = index + 1;
648      }
649      let date = new Date(time * 1000);
650      let payload = data.substring(start);
651
652      return this._handleEventFilePayload(store, entry, type, date, payload);
653    })();
654  },
655
656  _filterAnnotations(annotations) {
657    let filteredAnnotations = {};
658    let crashReporter = Cc["@mozilla.org/toolkit/crash-reporter;1"].getService(
659      Ci.nsICrashReporter
660    );
661
662    for (let line in annotations) {
663      try {
664        if (crashReporter.isAnnotationWhitelistedForPing(line)) {
665          filteredAnnotations[line] = annotations[line];
666        }
667      } catch (e) {
668        // Silently drop unknown annotations
669      }
670    }
671
672    return filteredAnnotations;
673  },
674
675  _sendCrashPing(crashId, type, date, metadata = {}) {
676    // If we have a saved environment, use it. Otherwise report
677    // the current environment.
678    let reportMeta = Cu.cloneInto(metadata, myScope);
679    let crashEnvironment = parseAndRemoveField(
680      reportMeta,
681      "TelemetryEnvironment"
682    );
683    let sessionId = getAndRemoveField(reportMeta, "TelemetrySessionId");
684    let stackTraces = getAndRemoveField(reportMeta, "StackTraces");
685    let minidumpSha256Hash = getAndRemoveField(
686      reportMeta,
687      "MinidumpSha256Hash"
688    );
689
690    // Filter the remaining annotations to remove privacy-sensitive ones
691    reportMeta = this._filterAnnotations(reportMeta);
692
693    this._pingPromise = TelemetryController.submitExternalPing(
694      "crash",
695      {
696        version: 1,
697        crashDate: date.toISOString().slice(0, 10), // YYYY-MM-DD
698        crashTime: date.toISOString().slice(0, 13) + ":00:00.000Z", // per-hour resolution
699        sessionId,
700        crashId,
701        minidumpSha256Hash,
702        processType: type,
703        stackTraces,
704        metadata: reportMeta,
705        hasCrashEnvironment: crashEnvironment !== null,
706      },
707      {
708        addClientId: true,
709        addEnvironment: true,
710        overrideEnvironment: crashEnvironment,
711      }
712    );
713  },
714
715  _handleEventFilePayload(store, entry, type, date, payload) {
716    // The payload types and formats are documented in docs/crash-events.rst.
717    // Do not change the format of an existing type. Instead, invent a new
718    // type.
719    // DO NOT ADD NEW TYPES WITHOUT DOCUMENTING!
720    let lines = payload.split("\n");
721
722    switch (type) {
723      case "crash.main.1":
724      case "crash.main.2":
725        return this.EVENT_FILE_ERROR_OBSOLETE;
726
727      case "crash.main.3":
728        let crashID = lines[0];
729        let metadata = JSON.parse(lines[1]);
730        store.addCrash(
731          this.PROCESS_TYPE_MAIN,
732          this.CRASH_TYPE_CRASH,
733          crashID,
734          date,
735          metadata
736        );
737
738        if (!("CrashPingUUID" in metadata)) {
739          // If CrashPingUUID is not present then a ping was not generated
740          // by the crashreporter for this crash so we need to send one from
741          // here.
742          this._sendCrashPing(crashID, this.PROCESS_TYPE_MAIN, date, metadata);
743        }
744
745        break;
746
747      case "crash.submission.1":
748        if (lines.length == 3) {
749          let [crashID, result, remoteID] = lines;
750          store.addCrash(
751            this.PROCESS_TYPE_MAIN,
752            this.CRASH_TYPE_CRASH,
753            crashID,
754            date
755          );
756
757          let submissionID = this.generateSubmissionID();
758          let succeeded = result === "true";
759          store.addSubmissionAttempt(crashID, submissionID, date);
760          store.addSubmissionResult(
761            crashID,
762            submissionID,
763            date,
764            succeeded
765              ? this.SUBMISSION_RESULT_OK
766              : this.SUBMISSION_RESULT_FAILED
767          );
768          if (succeeded) {
769            store.setRemoteCrashID(crashID, remoteID);
770          }
771        } else {
772          return this.EVENT_FILE_ERROR_MALFORMED;
773        }
774        break;
775
776      default:
777        return this.EVENT_FILE_ERROR_UNKNOWN_EVENT;
778    }
779
780    return this.EVENT_FILE_SUCCESS;
781  },
782
783  /**
784   * The resolved promise is an array of objects with the properties:
785   *
786   *   path -- String filename
787   *   id -- regexp.match()[1] (likely the crash ID)
788   *   date -- Date mtime of the file
789   */
790  _getDirectoryEntries(path, re) {
791    return (async function() {
792      let children = await IOUtils.getChildren(path);
793      let entries = [];
794
795      for (const entry of children) {
796        let stat = await IOUtils.stat(entry);
797        if (stat.type == "directory") {
798          continue;
799        }
800
801        let filename = PathUtils.filename(entry);
802        let match = re.exec(filename);
803        if (!match) {
804          continue;
805        }
806        entries.push({
807          path: entry,
808          id: match[1],
809          date: stat.lastModified,
810        });
811      }
812
813      entries.sort((a, b) => {
814        return a.date - b.date;
815      });
816
817      return entries;
818    })();
819  },
820
821  _getStore() {
822    if (this._getStoreTask) {
823      return this._getStoreTask;
824    }
825
826    return (this._getStoreTask = (async () => {
827      try {
828        if (!this._store) {
829          await IOUtils.makeDirectory(this._storeDir, {
830            permissions: 0o700,
831          });
832
833          let store = new CrashStore(
834            this._storeDir,
835            this._telemetryStoreSizeKey
836          );
837          await store.load();
838
839          this._store = store;
840          this._storeTimer = Cc["@mozilla.org/timer;1"].createInstance(
841            Ci.nsITimer
842          );
843        }
844
845        // The application can go long periods without interacting with the
846        // store. Since the store takes up resources, we automatically "free"
847        // the store after inactivity so resources can be returned to the
848        // system. We do this via a timer and a mechanism that tracks when the
849        // store is being accessed.
850        this._storeTimer.cancel();
851
852        // This callback frees resources from the store unless the store
853        // is protected from freeing by some other process.
854        let timerCB = () => {
855          if (this._storeProtectedCount) {
856            this._storeTimer.initWithCallback(
857              timerCB,
858              this.STORE_EXPIRATION_MS,
859              this._storeTimer.TYPE_ONE_SHOT
860            );
861            return;
862          }
863
864          // We kill the reference that we hold. GC will kill it later. If
865          // someone else holds a reference, that will prevent GC until that
866          // reference is gone.
867          this._store = null;
868          this._storeTimer = null;
869        };
870
871        this._storeTimer.initWithCallback(
872          timerCB,
873          this.STORE_EXPIRATION_MS,
874          this._storeTimer.TYPE_ONE_SHOT
875        );
876
877        return this._store;
878      } finally {
879        this._getStoreTask = null;
880      }
881    })());
882  },
883
884  /**
885   * Obtain information about all known crashes.
886   *
887   * Returns an array of CrashRecord instances. Instances are read-only.
888   */
889  getCrashes() {
890    return (async () => {
891      let store = await this._getStore();
892
893      return store.crashes;
894    })();
895  },
896
897  getCrashCountsByDay() {
898    return (async () => {
899      let store = await this._getStore();
900
901      return store._countsByDay;
902    })();
903  },
904});
905
906var gCrashManager;
907
908/**
909 * Interface to storage of crash data.
910 *
911 * This type handles storage of crash metadata. It exists as a separate type
912 * from the crash manager for performance reasons: since all crash metadata
913 * needs to be loaded into memory for access, we wish to easily dispose of all
914 * associated memory when this data is no longer needed. Having an isolated
915 * object whose references can easily be lost faciliates that simple disposal.
916 *
917 * When metadata is updated, the caller must explicitly persist the changes
918 * to disk. This prevents excessive I/O during updates.
919 *
920 * The store has a mechanism for ensuring it doesn't grow too large. A ceiling
921 * is placed on the number of daily events that can occur for events that can
922 * occur with relatively high frequency. If we've reached
923 * the high water mark and new data arrives, it's silently dropped.
924 * However, the count of actual events is always preserved. This allows
925 * us to report on the severity of problems beyond the storage threshold.
926 *
927 * Main process crashes are excluded from limits because they are both
928 * important and should be rare.
929 *
930 * @param storeDir (string)
931 *        Directory the store should be located in.
932 * @param telemetrySizeKey (string)
933 *        The telemetry histogram that should be used to store the size
934 *        of the data file.
935 */
936function CrashStore(storeDir, telemetrySizeKey) {
937  this._storeDir = storeDir;
938  this._telemetrySizeKey = telemetrySizeKey;
939
940  this._storePath = PathUtils.join(storeDir, "store.json.mozlz4");
941
942  // Holds the read data from disk.
943  this._data = null;
944
945  // Maps days since UNIX epoch to a Map of event types to counts.
946  // This data structure is populated when the JSON file is loaded
947  // and is also updated when new events are added.
948  this._countsByDay = new Map();
949}
950
951CrashStore.prototype = Object.freeze({
952  // Maximum number of events to store per day. This establishes a
953  // ceiling on the per-type/per-day records that will be stored.
954  HIGH_WATER_DAILY_THRESHOLD: 500,
955
956  /**
957   * Reset all data.
958   */
959  reset() {
960    this._data = {
961      v: 1,
962      crashes: new Map(),
963      corruptDate: null,
964    };
965    this._countsByDay = new Map();
966  },
967
968  /**
969   * Load data from disk.
970   *
971   * @return Promise
972   */
973  load() {
974    return (async () => {
975      // Loading replaces data.
976      this.reset();
977
978      try {
979        let decoder = new TextDecoder();
980        let data = await IOUtils.read(this._storePath, { decompress: true });
981        data = JSON.parse(decoder.decode(data));
982
983        if (data.corruptDate) {
984          this._data.corruptDate = new Date(data.corruptDate);
985        }
986
987        // actualCounts is used to validate that the derived counts by
988        // days stored in the payload matches up to actual data.
989        let actualCounts = new Map();
990
991        // In the past, submissions were stored as separate crash records
992        // with an id of e.g. "someID-submission". If we find IDs ending
993        // with "-submission", we will need to convert the data to be stored
994        // as actual submissions.
995        //
996        // The old way of storing submissions was used from FF33 - FF34. We
997        // drop this old data on the floor.
998        for (let id in data.crashes) {
999          if (id.endsWith("-submission")) {
1000            continue;
1001          }
1002
1003          let crash = data.crashes[id];
1004          let denormalized = this._denormalize(crash);
1005
1006          denormalized.submissions = new Map();
1007          if (crash.submissions) {
1008            for (let submissionID in crash.submissions) {
1009              let submission = crash.submissions[submissionID];
1010              denormalized.submissions.set(
1011                submissionID,
1012                this._denormalize(submission)
1013              );
1014            }
1015          }
1016
1017          this._data.crashes.set(id, denormalized);
1018
1019          let key =
1020            dateToDays(denormalized.crashDate) + "-" + denormalized.type;
1021          actualCounts.set(key, (actualCounts.get(key) || 0) + 1);
1022
1023          // If we have an OOM size, count the crash as an OOM in addition to
1024          // being a main process crash.
1025          if (
1026            denormalized.metadata &&
1027            denormalized.metadata.OOMAllocationSize
1028          ) {
1029            let oomKey = key + "-oom";
1030            actualCounts.set(oomKey, (actualCounts.get(oomKey) || 0) + 1);
1031          }
1032        }
1033
1034        // The validation in this loop is arguably not necessary. We perform
1035        // it as a defense against unknown bugs.
1036        for (let dayKey in data.countsByDay) {
1037          let day = parseInt(dayKey, 10);
1038          for (let type in data.countsByDay[day]) {
1039            this._ensureCountsForDay(day);
1040
1041            let count = data.countsByDay[day][type];
1042            let key = day + "-" + type;
1043
1044            // If the payload says we have data for a given day but we
1045            // don't, the payload is wrong. Ignore it.
1046            if (!actualCounts.has(key)) {
1047              continue;
1048            }
1049
1050            // If we encountered more data in the payload than what the
1051            // data structure says, use the proper value.
1052            count = Math.max(count, actualCounts.get(key));
1053
1054            this._countsByDay.get(day).set(type, count);
1055          }
1056        }
1057      } catch (ex) {
1058        // Missing files (first use) are allowed.
1059        if (!(ex instanceof DOMException) || ex.name != "NotFoundError") {
1060          // If we can't load for any reason, mark a corrupt date in the instance
1061          // and swallow the error.
1062          //
1063          // The marking of a corrupted file is intentionally not persisted to
1064          // disk yet. Instead, we wait until the next save(). This is to give
1065          // non-permanent failures the opportunity to recover on their own.
1066          this._data.corruptDate = new Date();
1067        }
1068      }
1069    })();
1070  },
1071
1072  /**
1073   * Save data to disk.
1074   *
1075   * @return Promise<null>
1076   */
1077  save() {
1078    return (async () => {
1079      if (!this._data) {
1080        return;
1081      }
1082
1083      let normalized = {
1084        // The version should be incremented whenever the format
1085        // changes.
1086        v: 1,
1087        // Maps crash IDs to objects defining the crash.
1088        crashes: {},
1089        // Maps days since UNIX epoch to objects mapping event types to
1090        // counts. This is a mirror of this._countsByDay. e.g.
1091        // {
1092        //    15000: {
1093        //        "main-crash": 2,
1094        //        "plugin-crash": 1
1095        //    }
1096        // }
1097        countsByDay: {},
1098
1099        // When the store was last corrupted.
1100        corruptDate: null,
1101      };
1102
1103      if (this._data.corruptDate) {
1104        normalized.corruptDate = this._data.corruptDate.getTime();
1105      }
1106
1107      for (let [id, crash] of this._data.crashes) {
1108        let c = this._normalize(crash);
1109
1110        c.submissions = {};
1111        for (let [submissionID, submission] of crash.submissions) {
1112          c.submissions[submissionID] = this._normalize(submission);
1113        }
1114
1115        normalized.crashes[id] = c;
1116      }
1117
1118      for (let [day, m] of this._countsByDay) {
1119        normalized.countsByDay[day] = {};
1120        for (let [type, count] of m) {
1121          normalized.countsByDay[day][type] = count;
1122        }
1123      }
1124
1125      let encoder = new TextEncoder();
1126      let data = encoder.encode(JSON.stringify(normalized));
1127      let size = await IOUtils.write(this._storePath, data, {
1128        tmpPath: this._storePath + ".tmp",
1129        compress: true,
1130      });
1131      if (this._telemetrySizeKey) {
1132        Services.telemetry.getHistogramById(this._telemetrySizeKey).add(size);
1133      }
1134    })();
1135  },
1136
1137  /**
1138   * Normalize an object into one fit for serialization.
1139   *
1140   * This function along with _denormalize() serve to hack around the
1141   * default handling of Date JSON serialization because Date serialization
1142   * is undefined by JSON.
1143   *
1144   * Fields ending with "Date" are assumed to contain Date instances.
1145   * We convert these to milliseconds since epoch on output and back to
1146   * Date on input.
1147   */
1148  _normalize(o) {
1149    let normalized = {};
1150
1151    for (let k in o) {
1152      let v = o[k];
1153      if (v && k.endsWith("Date")) {
1154        normalized[k] = v.getTime();
1155      } else {
1156        normalized[k] = v;
1157      }
1158    }
1159
1160    return normalized;
1161  },
1162
1163  /**
1164   * Convert a serialized object back to its native form.
1165   */
1166  _denormalize(o) {
1167    let n = {};
1168
1169    for (let k in o) {
1170      let v = o[k];
1171      if (v && k.endsWith("Date")) {
1172        n[k] = new Date(parseInt(v, 10));
1173      } else {
1174        n[k] = v;
1175      }
1176    }
1177
1178    return n;
1179  },
1180
1181  /**
1182   * Prune old crash data.
1183   *
1184   * Crashes without recent activity are pruned from the store so the
1185   * size of the store is not unbounded. If there is activity on a crash,
1186   * that activity will keep the crash and all its data around for longer.
1187   *
1188   * @param date
1189   *        (Date) The cutoff at which data will be pruned. If an entry
1190   *        doesn't have data newer than this, it will be pruned.
1191   */
1192  pruneOldCrashes(date) {
1193    for (let crash of this.crashes) {
1194      let newest = crash.newestDate;
1195      if (!newest || newest.getTime() < date.getTime()) {
1196        this._data.crashes.delete(crash.id);
1197      }
1198    }
1199  },
1200
1201  /**
1202   * Date the store was last corrupted and required a reset.
1203   *
1204   * May be null (no corruption has ever occurred) or a Date instance.
1205   */
1206  get corruptDate() {
1207    return this._data.corruptDate;
1208  },
1209
1210  /**
1211   * The number of distinct crashes tracked.
1212   */
1213  get crashesCount() {
1214    return this._data.crashes.size;
1215  },
1216
1217  /**
1218   * All crashes tracked.
1219   *
1220   * This is an array of CrashRecord.
1221   */
1222  get crashes() {
1223    let crashes = [];
1224    for (let [, crash] of this._data.crashes) {
1225      crashes.push(new CrashRecord(crash));
1226    }
1227
1228    return crashes;
1229  },
1230
1231  /**
1232   * Obtain a particular crash from its ID.
1233   *
1234   * A CrashRecord will be returned if the crash exists. null will be returned
1235   * if the crash is unknown.
1236   */
1237  getCrash(id) {
1238    for (let crash of this.crashes) {
1239      if (crash.id == id) {
1240        return crash;
1241      }
1242    }
1243
1244    return null;
1245  },
1246
1247  _ensureCountsForDay(day) {
1248    if (!this._countsByDay.has(day)) {
1249      this._countsByDay.set(day, new Map());
1250    }
1251  },
1252
1253  /**
1254   * Ensure the crash record is present in storage.
1255   *
1256   * Returns the crash record if we're allowed to store it or null
1257   * if we've hit the high water mark.
1258   *
1259   * @param processType
1260   *        (string) One of the PROCESS_TYPE constants.
1261   * @param crashType
1262   *        (string) One of the CRASH_TYPE constants.
1263   * @param id
1264   *        (string) The crash ID.
1265   * @param date
1266   *        (Date) When this crash occurred.
1267   * @param metadata
1268   *        (dictionary) Crash metadata, may be empty.
1269   *
1270   * @return null | object crash record
1271   */
1272  _ensureCrashRecord(processType, crashType, id, date, metadata) {
1273    if (!id) {
1274      // Crashes are keyed on ID, so it's not really helpful to store crashes
1275      // without IDs.
1276      return null;
1277    }
1278
1279    let type = processType + "-" + crashType;
1280
1281    if (!this._data.crashes.has(id)) {
1282      let day = dateToDays(date);
1283      this._ensureCountsForDay(day);
1284
1285      let count = (this._countsByDay.get(day).get(type) || 0) + 1;
1286      this._countsByDay.get(day).set(type, count);
1287
1288      if (
1289        count > this.HIGH_WATER_DAILY_THRESHOLD &&
1290        processType != CrashManager.prototype.PROCESS_TYPE_MAIN
1291      ) {
1292        return null;
1293      }
1294
1295      // If we have an OOM size, count the crash as an OOM in addition to
1296      // being a main process crash.
1297      if (metadata && metadata.OOMAllocationSize) {
1298        let oomType = type + "-oom";
1299        let oomCount = (this._countsByDay.get(day).get(oomType) || 0) + 1;
1300        this._countsByDay.get(day).set(oomType, oomCount);
1301      }
1302
1303      this._data.crashes.set(id, {
1304        id,
1305        remoteID: null,
1306        type,
1307        crashDate: date,
1308        submissions: new Map(),
1309        classifications: [],
1310        metadata,
1311      });
1312    }
1313
1314    let crash = this._data.crashes.get(id);
1315    crash.type = type;
1316    crash.crashDate = date;
1317
1318    return crash;
1319  },
1320
1321  /**
1322   * Record the occurrence of a crash.
1323   *
1324   * @param processType (string) One of the PROCESS_TYPE constants.
1325   * @param crashType (string) One of the CRASH_TYPE constants.
1326   * @param id (string) Crash ID. Likely a UUID.
1327   * @param date (Date) When the crash occurred.
1328   * @param metadata (dictionary) Crash metadata, may be empty.
1329   *
1330   * @return boolean True if the crash was recorded and false if not.
1331   */
1332  addCrash(processType, crashType, id, date, metadata) {
1333    return !!this._ensureCrashRecord(
1334      processType,
1335      crashType,
1336      id,
1337      date,
1338      metadata
1339    );
1340  },
1341
1342  /**
1343   * @return boolean True if the remote ID was recorded and false if not.
1344   */
1345  setRemoteCrashID(crashID, remoteID) {
1346    let crash = this._data.crashes.get(crashID);
1347    if (!crash || !remoteID) {
1348      return false;
1349    }
1350
1351    crash.remoteID = remoteID;
1352    return true;
1353  },
1354
1355  getCrashesOfType(processType, crashType) {
1356    let crashes = [];
1357    for (let crash of this.crashes) {
1358      if (crash.isOfType(processType, crashType)) {
1359        crashes.push(crash);
1360      }
1361    }
1362
1363    return crashes;
1364  },
1365
1366  /**
1367   * Ensure the submission record is present in storage.
1368   * @returns [submission, crash]
1369   */
1370  _ensureSubmissionRecord(crashID, submissionID) {
1371    let crash = this._data.crashes.get(crashID);
1372    if (!crash || !submissionID) {
1373      return null;
1374    }
1375
1376    if (!crash.submissions.has(submissionID)) {
1377      crash.submissions.set(submissionID, {
1378        requestDate: null,
1379        responseDate: null,
1380        result: null,
1381      });
1382    }
1383
1384    return [crash.submissions.get(submissionID), crash];
1385  },
1386
1387  /**
1388   * @return boolean True if the attempt was recorded.
1389   */
1390  addSubmissionAttempt(crashID, submissionID, date) {
1391    let [submission, crash] = this._ensureSubmissionRecord(
1392      crashID,
1393      submissionID
1394    );
1395    if (!submission) {
1396      return false;
1397    }
1398
1399    submission.requestDate = date;
1400    Services.telemetry
1401      .getKeyedHistogramById("PROCESS_CRASH_SUBMIT_ATTEMPT")
1402      .add(crash.type, 1);
1403    return true;
1404  },
1405
1406  /**
1407   * @return boolean True if the response was recorded.
1408   */
1409  addSubmissionResult(crashID, submissionID, date, result) {
1410    let crash = this._data.crashes.get(crashID);
1411    if (!crash || !submissionID) {
1412      return false;
1413    }
1414    let submission = crash.submissions.get(submissionID);
1415    if (!submission) {
1416      return false;
1417    }
1418
1419    submission.responseDate = date;
1420    submission.result = result;
1421    Services.telemetry
1422      .getKeyedHistogramById("PROCESS_CRASH_SUBMIT_SUCCESS")
1423      .add(crash.type, result == "ok");
1424    return true;
1425  },
1426
1427  /**
1428   * @return boolean True if the classifications were set.
1429   */
1430  setCrashClassifications(crashID, classifications) {
1431    let crash = this._data.crashes.get(crashID);
1432    if (!crash) {
1433      return false;
1434    }
1435
1436    crash.classifications = classifications;
1437    return true;
1438  },
1439});
1440
1441/**
1442 * Represents an individual crash with metadata.
1443 *
1444 * This is a wrapper around the low-level anonymous JS objects that define
1445 * crashes. It exposes a consistent and helpful API.
1446 *
1447 * Instances of this type should only be constructured inside this module,
1448 * not externally. The constructor is not considered a public API.
1449 *
1450 * @param o (object)
1451 *        The crash's entry from the CrashStore.
1452 */
1453function CrashRecord(o) {
1454  this._o = o;
1455}
1456
1457CrashRecord.prototype = Object.freeze({
1458  get id() {
1459    return this._o.id;
1460  },
1461
1462  get remoteID() {
1463    return this._o.remoteID;
1464  },
1465
1466  get crashDate() {
1467    return this._o.crashDate;
1468  },
1469
1470  /**
1471   * Obtain the newest date in this record.
1472   *
1473   * This is a convenience getter. The returned value is used to determine when
1474   * to expire a record.
1475   */
1476  get newestDate() {
1477    // We currently only have 1 date, so this is easy.
1478    return this._o.crashDate;
1479  },
1480
1481  get oldestDate() {
1482    return this._o.crashDate;
1483  },
1484
1485  get type() {
1486    return this._o.type;
1487  },
1488
1489  isOfType(processType, crashType) {
1490    return processType + "-" + crashType == this.type;
1491  },
1492
1493  get submissions() {
1494    return this._o.submissions;
1495  },
1496
1497  get classifications() {
1498    return this._o.classifications;
1499  },
1500
1501  get metadata() {
1502    return this._o.metadata;
1503  },
1504});
1505
1506XPCOMUtils.defineLazyGetter(CrashManager, "_log", () =>
1507  Log.repository.getLogger("Crashes.CrashManager")
1508);
1509
1510/**
1511 * Obtain the global CrashManager instance used by the running application.
1512 *
1513 * CrashManager is likely only ever instantiated once per application lifetime.
1514 * The main reason it's implemented as a reusable type is to facilitate testing.
1515 */
1516XPCOMUtils.defineLazyGetter(CrashManager, "Singleton", function() {
1517  if (gCrashManager) {
1518    return gCrashManager;
1519  }
1520
1521  gCrashManager = new CrashManager({
1522    telemetryStoreSizeKey: "CRASH_STORE_COMPRESSED_BYTES",
1523  });
1524
1525  // Automatically aggregate event files shortly after startup. This
1526  // ensures it happens with some frequency.
1527  //
1528  // There are performance considerations here. While this is doing
1529  // work and could negatively impact performance, the amount of work
1530  // is kept small per run by periodically aggregating event files.
1531  // Furthermore, well-behaving installs should not have much work
1532  // here to do. If there is a lot of work, that install has bigger
1533  // issues beyond reduced performance near startup.
1534  gCrashManager.scheduleMaintenance(AGGREGATE_STARTUP_DELAY_MS);
1535
1536  return gCrashManager;
1537});
1538
1539function getCrashManager() {
1540  return CrashManager.Singleton;
1541}
1542