1/* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5"use strict"; 6 7const myScope = this; 8 9const { PromiseUtils } = ChromeUtils.import( 10 "resource://gre/modules/PromiseUtils.jsm" 11); 12const { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm"); 13const { setTimeout } = ChromeUtils.import("resource://gre/modules/Timer.jsm"); 14const { XPCOMUtils } = ChromeUtils.import( 15 "resource://gre/modules/XPCOMUtils.jsm" 16); 17 18XPCOMUtils.defineLazyModuleGetters(this, { 19 Log: "resource://gre/modules/Log.jsm", 20 TelemetryController: "resource://gre/modules/TelemetryController.jsm", 21}); 22 23var EXPORTED_SYMBOLS = ["CrashManager", "getCrashManager"]; 24 25/** 26 * How long to wait after application startup before crash event files are 27 * automatically aggregated. 28 * 29 * We defer aggregation for performance reasons, as we don't want too many 30 * services competing for I/O immediately after startup. 31 */ 32const AGGREGATE_STARTUP_DELAY_MS = 57000; 33 34const MILLISECONDS_IN_DAY = 24 * 60 * 60 * 1000; 35 36// Converts Date to days since UNIX epoch. 37// This was copied from /services/metrics.storage.jsm. The implementation 38// does not account for leap seconds. 39function dateToDays(date) { 40 return Math.floor(date.getTime() / MILLISECONDS_IN_DAY); 41} 42 43/** 44 * Get a field from the specified object and remove it. 45 * 46 * @param obj {Object} The object holding the field 47 * @param field {String} The name of the field to be parsed and removed 48 * 49 * @returns {String} the field contents as a string, null if none was found 50 */ 51function getAndRemoveField(obj, field) { 52 let value = null; 53 54 if (field in obj) { 55 value = obj[field]; 56 delete obj[field]; 57 } 58 59 return value; 60} 61 62/** 63 * Parse the string stored in the specified field as JSON and then remove the 64 * field from the object. 65 * 66 * @param obj {Object} The object holding the field 67 * @param field {String} The name of the field to be parsed and removed 68 * 69 * @returns {Object} the parsed object, null if none was found 70 */ 71function parseAndRemoveField(obj, field) { 72 let value = null; 73 74 if (field in obj) { 75 try { 76 value = JSON.parse(obj[field]); 77 } catch (e) { 78 Cu.reportError(e); 79 } 80 81 delete obj[field]; 82 } 83 84 return value; 85} 86 87/** 88 * A gateway to crash-related data. 89 * 90 * This type is generic and can be instantiated any number of times. 91 * However, most applications will typically only have one instance 92 * instantiated and that instance will point to profile and user appdata 93 * directories. 94 * 95 * Instances are created by passing an object with properties. 96 * Recognized properties are: 97 * 98 * pendingDumpsDir (string) (required) 99 * Where dump files that haven't been uploaded are located. 100 * 101 * submittedDumpsDir (string) (required) 102 * Where records of uploaded dumps are located. 103 * 104 * eventsDirs (array) 105 * Directories (defined as strings) where events files are written. This 106 * instance will collects events from files in the directories specified. 107 * 108 * storeDir (string) 109 * Directory we will use for our data store. This instance will write 110 * data files into the directory specified. 111 * 112 * telemetryStoreSizeKey (string) 113 * Telemetry histogram to report store size under. 114 */ 115var CrashManager = function(options) { 116 for (let k in options) { 117 let value = options[k]; 118 119 switch (k) { 120 case "pendingDumpsDir": 121 case "submittedDumpsDir": 122 case "eventsDirs": 123 case "storeDir": 124 let key = "_" + k; 125 delete this[key]; 126 Object.defineProperty(this, key, { value }); 127 break; 128 case "telemetryStoreSizeKey": 129 this._telemetryStoreSizeKey = value; 130 break; 131 132 default: 133 throw new Error("Unknown property in options: " + k); 134 } 135 } 136 137 // Promise for in-progress aggregation operation. We store it on the 138 // object so it can be returned for in-progress operations. 139 this._aggregatePromise = null; 140 141 // Map of crash ID / promise tuples used to track adding new crashes. 142 this._crashPromises = new Map(); 143 144 // Promise for the crash ping used only for testing. 145 this._pingPromise = null; 146 147 // The CrashStore currently attached to this object. 148 this._store = null; 149 150 // A Task to retrieve the store. This is needed to avoid races when 151 // _getStore() is called multiple times in a short interval. 152 this._getStoreTask = null; 153 154 // The timer controlling the expiration of the CrashStore instance. 155 this._storeTimer = null; 156 157 // This is a semaphore that prevents the store from being freed by our 158 // timer-based resource freeing mechanism. 159 this._storeProtectedCount = 0; 160}; 161 162CrashManager.prototype = Object.freeze({ 163 // A crash in the main process. 164 PROCESS_TYPE_MAIN: "main", 165 166 // A crash in a content process. 167 PROCESS_TYPE_CONTENT: "content", 168 169 // A crash in a Gecko media plugin process. 170 PROCESS_TYPE_GMPLUGIN: "gmplugin", 171 172 // A crash in the GPU process. 173 PROCESS_TYPE_GPU: "gpu", 174 175 // A crash in the VR process. 176 PROCESS_TYPE_VR: "vr", 177 178 // A crash in the RDD process. 179 PROCESS_TYPE_RDD: "rdd", 180 181 // A crash in the socket process. 182 PROCESS_TYPE_SOCKET: "socket", 183 184 // A real crash. 185 CRASH_TYPE_CRASH: "crash", 186 187 // A hang. 188 CRASH_TYPE_HANG: "hang", 189 190 // Submission result values. 191 SUBMISSION_RESULT_OK: "ok", 192 SUBMISSION_RESULT_FAILED: "failed", 193 194 DUMP_REGEX: /^([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.dmp$/i, 195 SUBMITTED_REGEX: /^bp-(?:hr-)?([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.txt$/i, 196 ALL_REGEX: /^(.*)$/, 197 198 // How long the store object should persist in memory before being 199 // automatically garbage collected. 200 STORE_EXPIRATION_MS: 60 * 1000, 201 202 // Number of days after which a crash with no activity will get purged. 203 PURGE_OLDER_THAN_DAYS: 180, 204 205 // The following are return codes for individual event file processing. 206 // File processed OK. 207 EVENT_FILE_SUCCESS: "ok", 208 // The event appears to be malformed. 209 EVENT_FILE_ERROR_MALFORMED: "malformed", 210 // The event is obsolete. 211 EVENT_FILE_ERROR_OBSOLETE: "obsolete", 212 // The type of event is unknown. 213 EVENT_FILE_ERROR_UNKNOWN_EVENT: "unknown-event", 214 215 _lazyGetDir(field, path, leaf) { 216 delete this[field]; 217 let value = PathUtils.join(path, leaf); 218 Object.defineProperty(this, field, { value }); 219 return value; 220 }, 221 222 get _crDir() { 223 return this._lazyGetDir( 224 "_crDir", 225 Services.dirsvc.get("UAppData", Ci.nsIFile).path, 226 "Crash Reports" 227 ); 228 }, 229 230 get _storeDir() { 231 return this._lazyGetDir( 232 "_storeDir", 233 Services.dirsvc.get("ProfD", Ci.nsIFile).path, 234 "crashes" 235 ); 236 }, 237 238 get _pendingDumpsDir() { 239 return this._lazyGetDir("_pendingDumpsDir", this._crDir, "pending"); 240 }, 241 242 get _submittedDumpsDir() { 243 return this._lazyGetDir("_submittedDumpsDir", this._crDir, "submitted"); 244 }, 245 246 get _eventsDirs() { 247 delete this._eventsDirs; 248 let value = [ 249 PathUtils.join(this._crDir, "events"), 250 PathUtils.join(this._storeDir, "events"), 251 ]; 252 Object.defineProperty(this, "_eventsDirs", { value }); 253 return value; 254 }, 255 256 /** 257 * Obtain a list of all dumps pending upload. 258 * 259 * The returned value is a promise that resolves to an array of objects 260 * on success. Each element in the array has the following properties: 261 * 262 * id (string) 263 * The ID of the crash (a UUID). 264 * 265 * path (string) 266 * The filename of the crash (<UUID.dmp>) 267 * 268 * date (Date) 269 * When this dump was created 270 * 271 * The returned arry is sorted by the modified time of the file backing 272 * the entry, oldest to newest. 273 * 274 * @return Promise<Array> 275 */ 276 pendingDumps() { 277 return this._getDirectoryEntries(this._pendingDumpsDir, this.DUMP_REGEX); 278 }, 279 280 /** 281 * Obtain a list of all dump files corresponding to submitted crashes. 282 * 283 * The returned value is a promise that resolves to an Array of 284 * objects. Each object has the following properties: 285 * 286 * path (string) 287 * The path of the file this entry comes from. 288 * 289 * id (string) 290 * The crash UUID. 291 * 292 * date (Date) 293 * The (estimated) date this crash was submitted. 294 * 295 * The returned array is sorted by the modified time of the file backing 296 * the entry, oldest to newest. 297 * 298 * @return Promise<Array> 299 */ 300 submittedDumps() { 301 return this._getDirectoryEntries( 302 this._submittedDumpsDir, 303 this.SUBMITTED_REGEX 304 ); 305 }, 306 307 /** 308 * Aggregates "loose" events files into the unified "database." 309 * 310 * This function should be called periodically to collect metadata from 311 * all events files into the central data store maintained by this manager. 312 * 313 * Once events have been stored in the backing store the corresponding 314 * source files are deleted. 315 * 316 * Only one aggregation operation is allowed to occur at a time. If this 317 * is called when an existing aggregation is in progress, the promise for 318 * the original call will be returned. 319 * 320 * @return promise<int> The number of event files that were examined. 321 */ 322 aggregateEventsFiles() { 323 if (this._aggregatePromise) { 324 return this._aggregatePromise; 325 } 326 327 return (this._aggregatePromise = (async () => { 328 if (this._aggregatePromise) { 329 return this._aggregatePromise; 330 } 331 332 try { 333 let unprocessedFiles = await this._getUnprocessedEventsFiles(); 334 335 let deletePaths = []; 336 let needsSave = false; 337 338 this._storeProtectedCount++; 339 for (let entry of unprocessedFiles) { 340 try { 341 let result = await this._processEventFile(entry); 342 343 switch (result) { 344 case this.EVENT_FILE_SUCCESS: 345 needsSave = true; 346 // Fall through. 347 348 case this.EVENT_FILE_ERROR_MALFORMED: 349 case this.EVENT_FILE_ERROR_OBSOLETE: 350 deletePaths.push(entry.path); 351 break; 352 353 case this.EVENT_FILE_ERROR_UNKNOWN_EVENT: 354 break; 355 356 default: 357 Cu.reportError( 358 "Unhandled crash event file return code. Please " + 359 "file a bug: " + 360 result 361 ); 362 } 363 } catch (ex) { 364 if (ex instanceof DOMException) { 365 this._log.warn("I/O error reading " + entry.path, ex); 366 } else { 367 // We should never encounter an exception. This likely represents 368 // a coding error because all errors should be detected and 369 // converted to return codes. 370 // 371 // If we get here, report the error and delete the source file 372 // so we don't see it again. 373 Cu.reportError( 374 "Exception when processing crash event file: " + 375 Log.exceptionStr(ex) 376 ); 377 deletePaths.push(entry.path); 378 } 379 } 380 } 381 382 if (needsSave) { 383 let store = await this._getStore(); 384 await store.save(); 385 } 386 387 for (let path of deletePaths) { 388 try { 389 await IOUtils.remove(path); 390 } catch (ex) { 391 this._log.warn("Error removing event file (" + path + ")", ex); 392 } 393 } 394 395 return unprocessedFiles.length; 396 } finally { 397 this._aggregatePromise = false; 398 this._storeProtectedCount--; 399 } 400 })()); 401 }, 402 403 /** 404 * Prune old crash data. 405 * 406 * @param date 407 * (Date) The cutoff point for pruning. Crashes without data newer 408 * than this will be pruned. 409 */ 410 pruneOldCrashes(date) { 411 return (async () => { 412 let store = await this._getStore(); 413 store.pruneOldCrashes(date); 414 await store.save(); 415 })(); 416 }, 417 418 /** 419 * Run tasks that should be periodically performed. 420 */ 421 runMaintenanceTasks() { 422 return (async () => { 423 await this.aggregateEventsFiles(); 424 425 let offset = this.PURGE_OLDER_THAN_DAYS * MILLISECONDS_IN_DAY; 426 await this.pruneOldCrashes(new Date(Date.now() - offset)); 427 })(); 428 }, 429 430 /** 431 * Schedule maintenance tasks for some point in the future. 432 * 433 * @param delay 434 * (integer) Delay in milliseconds when maintenance should occur. 435 */ 436 scheduleMaintenance(delay) { 437 let deferred = PromiseUtils.defer(); 438 439 setTimeout(() => { 440 this.runMaintenanceTasks().then(deferred.resolve, deferred.reject); 441 }, delay); 442 443 return deferred.promise; 444 }, 445 446 /** 447 * Record the occurrence of a crash. 448 * 449 * This method skips event files altogether and writes directly and 450 * immediately to the manager's data store. 451 * 452 * @param processType (string) One of the PROCESS_TYPE constants. 453 * @param crashType (string) One of the CRASH_TYPE constants. 454 * @param id (string) Crash ID. Likely a UUID. 455 * @param date (Date) When the crash occurred. 456 * @param metadata (dictionary) Crash metadata, may be empty. 457 * 458 * @return promise<null> Resolved when the store has been saved. 459 */ 460 addCrash(processType, crashType, id, date, metadata) { 461 let promise = (async () => { 462 let store = await this._getStore(); 463 if (store.addCrash(processType, crashType, id, date, metadata)) { 464 await store.save(); 465 } 466 467 let deferred = this._crashPromises.get(id); 468 469 if (deferred) { 470 this._crashPromises.delete(id); 471 deferred.resolve(); 472 } 473 474 // Send a telemetry ping for each non-main process crash 475 if ( 476 processType === this.PROCESS_TYPE_CONTENT || 477 processType === this.PROCESS_TYPE_GPU || 478 processType === this.PROCESS_TYPE_VR || 479 processType === this.PROCESS_TYPE_RDD || 480 processType === this.PROCESS_TYPE_SOCKET 481 ) { 482 this._sendCrashPing(id, processType, date, metadata); 483 } 484 })(); 485 486 return promise; 487 }, 488 489 /** 490 * Returns a promise that is resolved only the crash with the specified id 491 * has been fully recorded. 492 * 493 * @param id (string) Crash ID. Likely a UUID. 494 * 495 * @return promise<null> Resolved when the crash is present. 496 */ 497 async ensureCrashIsPresent(id) { 498 let store = await this._getStore(); 499 let crash = store.getCrash(id); 500 501 if (crash) { 502 return Promise.resolve(); 503 } 504 505 let deferred = PromiseUtils.defer(); 506 507 this._crashPromises.set(id, deferred); 508 return deferred.promise; 509 }, 510 511 /** 512 * Record the remote ID for a crash. 513 * 514 * @param crashID (string) Crash ID. Likely a UUID. 515 * @param remoteID (Date) Server/Breakpad ID. 516 * 517 * @return boolean True if the remote ID was recorded. 518 */ 519 async setRemoteCrashID(crashID, remoteID) { 520 let store = await this._getStore(); 521 if (store.setRemoteCrashID(crashID, remoteID)) { 522 await store.save(); 523 } 524 }, 525 526 /** 527 * Generate a submission ID for use with addSubmission{Attempt,Result}. 528 */ 529 generateSubmissionID() { 530 return ( 531 "sub-" + 532 Cc["@mozilla.org/uuid-generator;1"] 533 .getService(Ci.nsIUUIDGenerator) 534 .generateUUID() 535 .toString() 536 .slice(1, -1) 537 ); 538 }, 539 540 /** 541 * Record the occurrence of a submission attempt for a crash. 542 * 543 * @param crashID (string) Crash ID. Likely a UUID. 544 * @param submissionID (string) Submission ID. Likely a UUID. 545 * @param date (Date) When the attempt occurred. 546 * 547 * @return boolean True if the attempt was recorded and false if not. 548 */ 549 async addSubmissionAttempt(crashID, submissionID, date) { 550 let store = await this._getStore(); 551 if (store.addSubmissionAttempt(crashID, submissionID, date)) { 552 await store.save(); 553 } 554 }, 555 556 /** 557 * Record the occurrence of a submission result for a crash. 558 * 559 * @param crashID (string) Crash ID. Likely a UUID. 560 * @param submissionID (string) Submission ID. Likely a UUID. 561 * @param date (Date) When the submission result was obtained. 562 * @param result (string) One of the SUBMISSION_RESULT constants. 563 * 564 * @return boolean True if the result was recorded and false if not. 565 */ 566 async addSubmissionResult(crashID, submissionID, date, result) { 567 let store = await this._getStore(); 568 if (store.addSubmissionResult(crashID, submissionID, date, result)) { 569 await store.save(); 570 } 571 }, 572 573 /** 574 * Set the classification of a crash. 575 * 576 * @param crashID (string) Crash ID. Likely a UUID. 577 * @param classifications (array) Crash classifications. 578 * 579 * @return boolean True if the data was recorded and false if not. 580 */ 581 async setCrashClassifications(crashID, classifications) { 582 let store = await this._getStore(); 583 if (store.setCrashClassifications(crashID, classifications)) { 584 await store.save(); 585 } 586 }, 587 588 /** 589 * Obtain the paths of all unprocessed events files. 590 * 591 * The promise-resolved array is sorted by file mtime, oldest to newest. 592 */ 593 _getUnprocessedEventsFiles() { 594 return (async () => { 595 try { 596 let entries = []; 597 598 for (let dir of this._eventsDirs) { 599 for (let e of await this._getDirectoryEntries(dir, this.ALL_REGEX)) { 600 entries.push(e); 601 } 602 } 603 604 entries.sort((a, b) => { 605 return a.date - b.date; 606 }); 607 608 return entries; 609 } catch (e) { 610 Cu.reportError(e); 611 return []; 612 } 613 })(); 614 }, 615 616 // See docs/crash-events.rst for the file format specification. 617 _processEventFile(entry) { 618 return (async () => { 619 let data = await IOUtils.read(entry.path); 620 let store = await this._getStore(); 621 622 let decoder = new TextDecoder(); 623 data = decoder.decode(data); 624 625 let type, time; 626 let start = 0; 627 for (let i = 0; i < 2; i++) { 628 let index = data.indexOf("\n", start); 629 if (index == -1) { 630 return this.EVENT_FILE_ERROR_MALFORMED; 631 } 632 633 let sub = data.substring(start, index); 634 switch (i) { 635 case 0: 636 type = sub; 637 break; 638 case 1: 639 time = sub; 640 try { 641 time = parseInt(time, 10); 642 } catch (ex) { 643 return this.EVENT_FILE_ERROR_MALFORMED; 644 } 645 } 646 647 start = index + 1; 648 } 649 let date = new Date(time * 1000); 650 let payload = data.substring(start); 651 652 return this._handleEventFilePayload(store, entry, type, date, payload); 653 })(); 654 }, 655 656 _filterAnnotations(annotations) { 657 let filteredAnnotations = {}; 658 let crashReporter = Cc["@mozilla.org/toolkit/crash-reporter;1"].getService( 659 Ci.nsICrashReporter 660 ); 661 662 for (let line in annotations) { 663 try { 664 if (crashReporter.isAnnotationWhitelistedForPing(line)) { 665 filteredAnnotations[line] = annotations[line]; 666 } 667 } catch (e) { 668 // Silently drop unknown annotations 669 } 670 } 671 672 return filteredAnnotations; 673 }, 674 675 _sendCrashPing(crashId, type, date, metadata = {}) { 676 // If we have a saved environment, use it. Otherwise report 677 // the current environment. 678 let reportMeta = Cu.cloneInto(metadata, myScope); 679 let crashEnvironment = parseAndRemoveField( 680 reportMeta, 681 "TelemetryEnvironment" 682 ); 683 let sessionId = getAndRemoveField(reportMeta, "TelemetrySessionId"); 684 let stackTraces = getAndRemoveField(reportMeta, "StackTraces"); 685 let minidumpSha256Hash = getAndRemoveField( 686 reportMeta, 687 "MinidumpSha256Hash" 688 ); 689 690 // Filter the remaining annotations to remove privacy-sensitive ones 691 reportMeta = this._filterAnnotations(reportMeta); 692 693 this._pingPromise = TelemetryController.submitExternalPing( 694 "crash", 695 { 696 version: 1, 697 crashDate: date.toISOString().slice(0, 10), // YYYY-MM-DD 698 crashTime: date.toISOString().slice(0, 13) + ":00:00.000Z", // per-hour resolution 699 sessionId, 700 crashId, 701 minidumpSha256Hash, 702 processType: type, 703 stackTraces, 704 metadata: reportMeta, 705 hasCrashEnvironment: crashEnvironment !== null, 706 }, 707 { 708 addClientId: true, 709 addEnvironment: true, 710 overrideEnvironment: crashEnvironment, 711 } 712 ); 713 }, 714 715 _handleEventFilePayload(store, entry, type, date, payload) { 716 // The payload types and formats are documented in docs/crash-events.rst. 717 // Do not change the format of an existing type. Instead, invent a new 718 // type. 719 // DO NOT ADD NEW TYPES WITHOUT DOCUMENTING! 720 let lines = payload.split("\n"); 721 722 switch (type) { 723 case "crash.main.1": 724 case "crash.main.2": 725 return this.EVENT_FILE_ERROR_OBSOLETE; 726 727 case "crash.main.3": 728 let crashID = lines[0]; 729 let metadata = JSON.parse(lines[1]); 730 store.addCrash( 731 this.PROCESS_TYPE_MAIN, 732 this.CRASH_TYPE_CRASH, 733 crashID, 734 date, 735 metadata 736 ); 737 738 if (!("CrashPingUUID" in metadata)) { 739 // If CrashPingUUID is not present then a ping was not generated 740 // by the crashreporter for this crash so we need to send one from 741 // here. 742 this._sendCrashPing(crashID, this.PROCESS_TYPE_MAIN, date, metadata); 743 } 744 745 break; 746 747 case "crash.submission.1": 748 if (lines.length == 3) { 749 let [crashID, result, remoteID] = lines; 750 store.addCrash( 751 this.PROCESS_TYPE_MAIN, 752 this.CRASH_TYPE_CRASH, 753 crashID, 754 date 755 ); 756 757 let submissionID = this.generateSubmissionID(); 758 let succeeded = result === "true"; 759 store.addSubmissionAttempt(crashID, submissionID, date); 760 store.addSubmissionResult( 761 crashID, 762 submissionID, 763 date, 764 succeeded 765 ? this.SUBMISSION_RESULT_OK 766 : this.SUBMISSION_RESULT_FAILED 767 ); 768 if (succeeded) { 769 store.setRemoteCrashID(crashID, remoteID); 770 } 771 } else { 772 return this.EVENT_FILE_ERROR_MALFORMED; 773 } 774 break; 775 776 default: 777 return this.EVENT_FILE_ERROR_UNKNOWN_EVENT; 778 } 779 780 return this.EVENT_FILE_SUCCESS; 781 }, 782 783 /** 784 * The resolved promise is an array of objects with the properties: 785 * 786 * path -- String filename 787 * id -- regexp.match()[1] (likely the crash ID) 788 * date -- Date mtime of the file 789 */ 790 _getDirectoryEntries(path, re) { 791 return (async function() { 792 let children = await IOUtils.getChildren(path); 793 let entries = []; 794 795 for (const entry of children) { 796 let stat = await IOUtils.stat(entry); 797 if (stat.type == "directory") { 798 continue; 799 } 800 801 let filename = PathUtils.filename(entry); 802 let match = re.exec(filename); 803 if (!match) { 804 continue; 805 } 806 entries.push({ 807 path: entry, 808 id: match[1], 809 date: stat.lastModified, 810 }); 811 } 812 813 entries.sort((a, b) => { 814 return a.date - b.date; 815 }); 816 817 return entries; 818 })(); 819 }, 820 821 _getStore() { 822 if (this._getStoreTask) { 823 return this._getStoreTask; 824 } 825 826 return (this._getStoreTask = (async () => { 827 try { 828 if (!this._store) { 829 await IOUtils.makeDirectory(this._storeDir, { 830 permissions: 0o700, 831 }); 832 833 let store = new CrashStore( 834 this._storeDir, 835 this._telemetryStoreSizeKey 836 ); 837 await store.load(); 838 839 this._store = store; 840 this._storeTimer = Cc["@mozilla.org/timer;1"].createInstance( 841 Ci.nsITimer 842 ); 843 } 844 845 // The application can go long periods without interacting with the 846 // store. Since the store takes up resources, we automatically "free" 847 // the store after inactivity so resources can be returned to the 848 // system. We do this via a timer and a mechanism that tracks when the 849 // store is being accessed. 850 this._storeTimer.cancel(); 851 852 // This callback frees resources from the store unless the store 853 // is protected from freeing by some other process. 854 let timerCB = () => { 855 if (this._storeProtectedCount) { 856 this._storeTimer.initWithCallback( 857 timerCB, 858 this.STORE_EXPIRATION_MS, 859 this._storeTimer.TYPE_ONE_SHOT 860 ); 861 return; 862 } 863 864 // We kill the reference that we hold. GC will kill it later. If 865 // someone else holds a reference, that will prevent GC until that 866 // reference is gone. 867 this._store = null; 868 this._storeTimer = null; 869 }; 870 871 this._storeTimer.initWithCallback( 872 timerCB, 873 this.STORE_EXPIRATION_MS, 874 this._storeTimer.TYPE_ONE_SHOT 875 ); 876 877 return this._store; 878 } finally { 879 this._getStoreTask = null; 880 } 881 })()); 882 }, 883 884 /** 885 * Obtain information about all known crashes. 886 * 887 * Returns an array of CrashRecord instances. Instances are read-only. 888 */ 889 getCrashes() { 890 return (async () => { 891 let store = await this._getStore(); 892 893 return store.crashes; 894 })(); 895 }, 896 897 getCrashCountsByDay() { 898 return (async () => { 899 let store = await this._getStore(); 900 901 return store._countsByDay; 902 })(); 903 }, 904}); 905 906var gCrashManager; 907 908/** 909 * Interface to storage of crash data. 910 * 911 * This type handles storage of crash metadata. It exists as a separate type 912 * from the crash manager for performance reasons: since all crash metadata 913 * needs to be loaded into memory for access, we wish to easily dispose of all 914 * associated memory when this data is no longer needed. Having an isolated 915 * object whose references can easily be lost faciliates that simple disposal. 916 * 917 * When metadata is updated, the caller must explicitly persist the changes 918 * to disk. This prevents excessive I/O during updates. 919 * 920 * The store has a mechanism for ensuring it doesn't grow too large. A ceiling 921 * is placed on the number of daily events that can occur for events that can 922 * occur with relatively high frequency. If we've reached 923 * the high water mark and new data arrives, it's silently dropped. 924 * However, the count of actual events is always preserved. This allows 925 * us to report on the severity of problems beyond the storage threshold. 926 * 927 * Main process crashes are excluded from limits because they are both 928 * important and should be rare. 929 * 930 * @param storeDir (string) 931 * Directory the store should be located in. 932 * @param telemetrySizeKey (string) 933 * The telemetry histogram that should be used to store the size 934 * of the data file. 935 */ 936function CrashStore(storeDir, telemetrySizeKey) { 937 this._storeDir = storeDir; 938 this._telemetrySizeKey = telemetrySizeKey; 939 940 this._storePath = PathUtils.join(storeDir, "store.json.mozlz4"); 941 942 // Holds the read data from disk. 943 this._data = null; 944 945 // Maps days since UNIX epoch to a Map of event types to counts. 946 // This data structure is populated when the JSON file is loaded 947 // and is also updated when new events are added. 948 this._countsByDay = new Map(); 949} 950 951CrashStore.prototype = Object.freeze({ 952 // Maximum number of events to store per day. This establishes a 953 // ceiling on the per-type/per-day records that will be stored. 954 HIGH_WATER_DAILY_THRESHOLD: 500, 955 956 /** 957 * Reset all data. 958 */ 959 reset() { 960 this._data = { 961 v: 1, 962 crashes: new Map(), 963 corruptDate: null, 964 }; 965 this._countsByDay = new Map(); 966 }, 967 968 /** 969 * Load data from disk. 970 * 971 * @return Promise 972 */ 973 load() { 974 return (async () => { 975 // Loading replaces data. 976 this.reset(); 977 978 try { 979 let decoder = new TextDecoder(); 980 let data = await IOUtils.read(this._storePath, { decompress: true }); 981 data = JSON.parse(decoder.decode(data)); 982 983 if (data.corruptDate) { 984 this._data.corruptDate = new Date(data.corruptDate); 985 } 986 987 // actualCounts is used to validate that the derived counts by 988 // days stored in the payload matches up to actual data. 989 let actualCounts = new Map(); 990 991 // In the past, submissions were stored as separate crash records 992 // with an id of e.g. "someID-submission". If we find IDs ending 993 // with "-submission", we will need to convert the data to be stored 994 // as actual submissions. 995 // 996 // The old way of storing submissions was used from FF33 - FF34. We 997 // drop this old data on the floor. 998 for (let id in data.crashes) { 999 if (id.endsWith("-submission")) { 1000 continue; 1001 } 1002 1003 let crash = data.crashes[id]; 1004 let denormalized = this._denormalize(crash); 1005 1006 denormalized.submissions = new Map(); 1007 if (crash.submissions) { 1008 for (let submissionID in crash.submissions) { 1009 let submission = crash.submissions[submissionID]; 1010 denormalized.submissions.set( 1011 submissionID, 1012 this._denormalize(submission) 1013 ); 1014 } 1015 } 1016 1017 this._data.crashes.set(id, denormalized); 1018 1019 let key = 1020 dateToDays(denormalized.crashDate) + "-" + denormalized.type; 1021 actualCounts.set(key, (actualCounts.get(key) || 0) + 1); 1022 1023 // If we have an OOM size, count the crash as an OOM in addition to 1024 // being a main process crash. 1025 if ( 1026 denormalized.metadata && 1027 denormalized.metadata.OOMAllocationSize 1028 ) { 1029 let oomKey = key + "-oom"; 1030 actualCounts.set(oomKey, (actualCounts.get(oomKey) || 0) + 1); 1031 } 1032 } 1033 1034 // The validation in this loop is arguably not necessary. We perform 1035 // it as a defense against unknown bugs. 1036 for (let dayKey in data.countsByDay) { 1037 let day = parseInt(dayKey, 10); 1038 for (let type in data.countsByDay[day]) { 1039 this._ensureCountsForDay(day); 1040 1041 let count = data.countsByDay[day][type]; 1042 let key = day + "-" + type; 1043 1044 // If the payload says we have data for a given day but we 1045 // don't, the payload is wrong. Ignore it. 1046 if (!actualCounts.has(key)) { 1047 continue; 1048 } 1049 1050 // If we encountered more data in the payload than what the 1051 // data structure says, use the proper value. 1052 count = Math.max(count, actualCounts.get(key)); 1053 1054 this._countsByDay.get(day).set(type, count); 1055 } 1056 } 1057 } catch (ex) { 1058 // Missing files (first use) are allowed. 1059 if (!(ex instanceof DOMException) || ex.name != "NotFoundError") { 1060 // If we can't load for any reason, mark a corrupt date in the instance 1061 // and swallow the error. 1062 // 1063 // The marking of a corrupted file is intentionally not persisted to 1064 // disk yet. Instead, we wait until the next save(). This is to give 1065 // non-permanent failures the opportunity to recover on their own. 1066 this._data.corruptDate = new Date(); 1067 } 1068 } 1069 })(); 1070 }, 1071 1072 /** 1073 * Save data to disk. 1074 * 1075 * @return Promise<null> 1076 */ 1077 save() { 1078 return (async () => { 1079 if (!this._data) { 1080 return; 1081 } 1082 1083 let normalized = { 1084 // The version should be incremented whenever the format 1085 // changes. 1086 v: 1, 1087 // Maps crash IDs to objects defining the crash. 1088 crashes: {}, 1089 // Maps days since UNIX epoch to objects mapping event types to 1090 // counts. This is a mirror of this._countsByDay. e.g. 1091 // { 1092 // 15000: { 1093 // "main-crash": 2, 1094 // "plugin-crash": 1 1095 // } 1096 // } 1097 countsByDay: {}, 1098 1099 // When the store was last corrupted. 1100 corruptDate: null, 1101 }; 1102 1103 if (this._data.corruptDate) { 1104 normalized.corruptDate = this._data.corruptDate.getTime(); 1105 } 1106 1107 for (let [id, crash] of this._data.crashes) { 1108 let c = this._normalize(crash); 1109 1110 c.submissions = {}; 1111 for (let [submissionID, submission] of crash.submissions) { 1112 c.submissions[submissionID] = this._normalize(submission); 1113 } 1114 1115 normalized.crashes[id] = c; 1116 } 1117 1118 for (let [day, m] of this._countsByDay) { 1119 normalized.countsByDay[day] = {}; 1120 for (let [type, count] of m) { 1121 normalized.countsByDay[day][type] = count; 1122 } 1123 } 1124 1125 let encoder = new TextEncoder(); 1126 let data = encoder.encode(JSON.stringify(normalized)); 1127 let size = await IOUtils.write(this._storePath, data, { 1128 tmpPath: this._storePath + ".tmp", 1129 compress: true, 1130 }); 1131 if (this._telemetrySizeKey) { 1132 Services.telemetry.getHistogramById(this._telemetrySizeKey).add(size); 1133 } 1134 })(); 1135 }, 1136 1137 /** 1138 * Normalize an object into one fit for serialization. 1139 * 1140 * This function along with _denormalize() serve to hack around the 1141 * default handling of Date JSON serialization because Date serialization 1142 * is undefined by JSON. 1143 * 1144 * Fields ending with "Date" are assumed to contain Date instances. 1145 * We convert these to milliseconds since epoch on output and back to 1146 * Date on input. 1147 */ 1148 _normalize(o) { 1149 let normalized = {}; 1150 1151 for (let k in o) { 1152 let v = o[k]; 1153 if (v && k.endsWith("Date")) { 1154 normalized[k] = v.getTime(); 1155 } else { 1156 normalized[k] = v; 1157 } 1158 } 1159 1160 return normalized; 1161 }, 1162 1163 /** 1164 * Convert a serialized object back to its native form. 1165 */ 1166 _denormalize(o) { 1167 let n = {}; 1168 1169 for (let k in o) { 1170 let v = o[k]; 1171 if (v && k.endsWith("Date")) { 1172 n[k] = new Date(parseInt(v, 10)); 1173 } else { 1174 n[k] = v; 1175 } 1176 } 1177 1178 return n; 1179 }, 1180 1181 /** 1182 * Prune old crash data. 1183 * 1184 * Crashes without recent activity are pruned from the store so the 1185 * size of the store is not unbounded. If there is activity on a crash, 1186 * that activity will keep the crash and all its data around for longer. 1187 * 1188 * @param date 1189 * (Date) The cutoff at which data will be pruned. If an entry 1190 * doesn't have data newer than this, it will be pruned. 1191 */ 1192 pruneOldCrashes(date) { 1193 for (let crash of this.crashes) { 1194 let newest = crash.newestDate; 1195 if (!newest || newest.getTime() < date.getTime()) { 1196 this._data.crashes.delete(crash.id); 1197 } 1198 } 1199 }, 1200 1201 /** 1202 * Date the store was last corrupted and required a reset. 1203 * 1204 * May be null (no corruption has ever occurred) or a Date instance. 1205 */ 1206 get corruptDate() { 1207 return this._data.corruptDate; 1208 }, 1209 1210 /** 1211 * The number of distinct crashes tracked. 1212 */ 1213 get crashesCount() { 1214 return this._data.crashes.size; 1215 }, 1216 1217 /** 1218 * All crashes tracked. 1219 * 1220 * This is an array of CrashRecord. 1221 */ 1222 get crashes() { 1223 let crashes = []; 1224 for (let [, crash] of this._data.crashes) { 1225 crashes.push(new CrashRecord(crash)); 1226 } 1227 1228 return crashes; 1229 }, 1230 1231 /** 1232 * Obtain a particular crash from its ID. 1233 * 1234 * A CrashRecord will be returned if the crash exists. null will be returned 1235 * if the crash is unknown. 1236 */ 1237 getCrash(id) { 1238 for (let crash of this.crashes) { 1239 if (crash.id == id) { 1240 return crash; 1241 } 1242 } 1243 1244 return null; 1245 }, 1246 1247 _ensureCountsForDay(day) { 1248 if (!this._countsByDay.has(day)) { 1249 this._countsByDay.set(day, new Map()); 1250 } 1251 }, 1252 1253 /** 1254 * Ensure the crash record is present in storage. 1255 * 1256 * Returns the crash record if we're allowed to store it or null 1257 * if we've hit the high water mark. 1258 * 1259 * @param processType 1260 * (string) One of the PROCESS_TYPE constants. 1261 * @param crashType 1262 * (string) One of the CRASH_TYPE constants. 1263 * @param id 1264 * (string) The crash ID. 1265 * @param date 1266 * (Date) When this crash occurred. 1267 * @param metadata 1268 * (dictionary) Crash metadata, may be empty. 1269 * 1270 * @return null | object crash record 1271 */ 1272 _ensureCrashRecord(processType, crashType, id, date, metadata) { 1273 if (!id) { 1274 // Crashes are keyed on ID, so it's not really helpful to store crashes 1275 // without IDs. 1276 return null; 1277 } 1278 1279 let type = processType + "-" + crashType; 1280 1281 if (!this._data.crashes.has(id)) { 1282 let day = dateToDays(date); 1283 this._ensureCountsForDay(day); 1284 1285 let count = (this._countsByDay.get(day).get(type) || 0) + 1; 1286 this._countsByDay.get(day).set(type, count); 1287 1288 if ( 1289 count > this.HIGH_WATER_DAILY_THRESHOLD && 1290 processType != CrashManager.prototype.PROCESS_TYPE_MAIN 1291 ) { 1292 return null; 1293 } 1294 1295 // If we have an OOM size, count the crash as an OOM in addition to 1296 // being a main process crash. 1297 if (metadata && metadata.OOMAllocationSize) { 1298 let oomType = type + "-oom"; 1299 let oomCount = (this._countsByDay.get(day).get(oomType) || 0) + 1; 1300 this._countsByDay.get(day).set(oomType, oomCount); 1301 } 1302 1303 this._data.crashes.set(id, { 1304 id, 1305 remoteID: null, 1306 type, 1307 crashDate: date, 1308 submissions: new Map(), 1309 classifications: [], 1310 metadata, 1311 }); 1312 } 1313 1314 let crash = this._data.crashes.get(id); 1315 crash.type = type; 1316 crash.crashDate = date; 1317 1318 return crash; 1319 }, 1320 1321 /** 1322 * Record the occurrence of a crash. 1323 * 1324 * @param processType (string) One of the PROCESS_TYPE constants. 1325 * @param crashType (string) One of the CRASH_TYPE constants. 1326 * @param id (string) Crash ID. Likely a UUID. 1327 * @param date (Date) When the crash occurred. 1328 * @param metadata (dictionary) Crash metadata, may be empty. 1329 * 1330 * @return boolean True if the crash was recorded and false if not. 1331 */ 1332 addCrash(processType, crashType, id, date, metadata) { 1333 return !!this._ensureCrashRecord( 1334 processType, 1335 crashType, 1336 id, 1337 date, 1338 metadata 1339 ); 1340 }, 1341 1342 /** 1343 * @return boolean True if the remote ID was recorded and false if not. 1344 */ 1345 setRemoteCrashID(crashID, remoteID) { 1346 let crash = this._data.crashes.get(crashID); 1347 if (!crash || !remoteID) { 1348 return false; 1349 } 1350 1351 crash.remoteID = remoteID; 1352 return true; 1353 }, 1354 1355 getCrashesOfType(processType, crashType) { 1356 let crashes = []; 1357 for (let crash of this.crashes) { 1358 if (crash.isOfType(processType, crashType)) { 1359 crashes.push(crash); 1360 } 1361 } 1362 1363 return crashes; 1364 }, 1365 1366 /** 1367 * Ensure the submission record is present in storage. 1368 * @returns [submission, crash] 1369 */ 1370 _ensureSubmissionRecord(crashID, submissionID) { 1371 let crash = this._data.crashes.get(crashID); 1372 if (!crash || !submissionID) { 1373 return null; 1374 } 1375 1376 if (!crash.submissions.has(submissionID)) { 1377 crash.submissions.set(submissionID, { 1378 requestDate: null, 1379 responseDate: null, 1380 result: null, 1381 }); 1382 } 1383 1384 return [crash.submissions.get(submissionID), crash]; 1385 }, 1386 1387 /** 1388 * @return boolean True if the attempt was recorded. 1389 */ 1390 addSubmissionAttempt(crashID, submissionID, date) { 1391 let [submission, crash] = this._ensureSubmissionRecord( 1392 crashID, 1393 submissionID 1394 ); 1395 if (!submission) { 1396 return false; 1397 } 1398 1399 submission.requestDate = date; 1400 Services.telemetry 1401 .getKeyedHistogramById("PROCESS_CRASH_SUBMIT_ATTEMPT") 1402 .add(crash.type, 1); 1403 return true; 1404 }, 1405 1406 /** 1407 * @return boolean True if the response was recorded. 1408 */ 1409 addSubmissionResult(crashID, submissionID, date, result) { 1410 let crash = this._data.crashes.get(crashID); 1411 if (!crash || !submissionID) { 1412 return false; 1413 } 1414 let submission = crash.submissions.get(submissionID); 1415 if (!submission) { 1416 return false; 1417 } 1418 1419 submission.responseDate = date; 1420 submission.result = result; 1421 Services.telemetry 1422 .getKeyedHistogramById("PROCESS_CRASH_SUBMIT_SUCCESS") 1423 .add(crash.type, result == "ok"); 1424 return true; 1425 }, 1426 1427 /** 1428 * @return boolean True if the classifications were set. 1429 */ 1430 setCrashClassifications(crashID, classifications) { 1431 let crash = this._data.crashes.get(crashID); 1432 if (!crash) { 1433 return false; 1434 } 1435 1436 crash.classifications = classifications; 1437 return true; 1438 }, 1439}); 1440 1441/** 1442 * Represents an individual crash with metadata. 1443 * 1444 * This is a wrapper around the low-level anonymous JS objects that define 1445 * crashes. It exposes a consistent and helpful API. 1446 * 1447 * Instances of this type should only be constructured inside this module, 1448 * not externally. The constructor is not considered a public API. 1449 * 1450 * @param o (object) 1451 * The crash's entry from the CrashStore. 1452 */ 1453function CrashRecord(o) { 1454 this._o = o; 1455} 1456 1457CrashRecord.prototype = Object.freeze({ 1458 get id() { 1459 return this._o.id; 1460 }, 1461 1462 get remoteID() { 1463 return this._o.remoteID; 1464 }, 1465 1466 get crashDate() { 1467 return this._o.crashDate; 1468 }, 1469 1470 /** 1471 * Obtain the newest date in this record. 1472 * 1473 * This is a convenience getter. The returned value is used to determine when 1474 * to expire a record. 1475 */ 1476 get newestDate() { 1477 // We currently only have 1 date, so this is easy. 1478 return this._o.crashDate; 1479 }, 1480 1481 get oldestDate() { 1482 return this._o.crashDate; 1483 }, 1484 1485 get type() { 1486 return this._o.type; 1487 }, 1488 1489 isOfType(processType, crashType) { 1490 return processType + "-" + crashType == this.type; 1491 }, 1492 1493 get submissions() { 1494 return this._o.submissions; 1495 }, 1496 1497 get classifications() { 1498 return this._o.classifications; 1499 }, 1500 1501 get metadata() { 1502 return this._o.metadata; 1503 }, 1504}); 1505 1506XPCOMUtils.defineLazyGetter(CrashManager, "_log", () => 1507 Log.repository.getLogger("Crashes.CrashManager") 1508); 1509 1510/** 1511 * Obtain the global CrashManager instance used by the running application. 1512 * 1513 * CrashManager is likely only ever instantiated once per application lifetime. 1514 * The main reason it's implemented as a reusable type is to facilitate testing. 1515 */ 1516XPCOMUtils.defineLazyGetter(CrashManager, "Singleton", function() { 1517 if (gCrashManager) { 1518 return gCrashManager; 1519 } 1520 1521 gCrashManager = new CrashManager({ 1522 telemetryStoreSizeKey: "CRASH_STORE_COMPRESSED_BYTES", 1523 }); 1524 1525 // Automatically aggregate event files shortly after startup. This 1526 // ensures it happens with some frequency. 1527 // 1528 // There are performance considerations here. While this is doing 1529 // work and could negatively impact performance, the amount of work 1530 // is kept small per run by periodically aggregating event files. 1531 // Furthermore, well-behaving installs should not have much work 1532 // here to do. If there is a lot of work, that install has bigger 1533 // issues beyond reduced performance near startup. 1534 gCrashManager.scheduleMaintenance(AGGREGATE_STARTUP_DELAY_MS); 1535 1536 return gCrashManager; 1537}); 1538 1539function getCrashManager() { 1540 return CrashManager.Singleton; 1541} 1542