1/* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5"use strict"; 6 7const myScope = this; 8 9const { PromiseUtils } = ChromeUtils.import( 10 "resource://gre/modules/PromiseUtils.jsm" 11); 12const { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm"); 13const { setTimeout } = ChromeUtils.import("resource://gre/modules/Timer.jsm"); 14const { XPCOMUtils } = ChromeUtils.import( 15 "resource://gre/modules/XPCOMUtils.jsm" 16); 17 18XPCOMUtils.defineLazyModuleGetters(this, { 19 Log: "resource://gre/modules/Log.jsm", 20 TelemetryController: "resource://gre/modules/TelemetryController.jsm", 21}); 22 23var EXPORTED_SYMBOLS = [ 24 "CrashManager", 25 "getCrashManager", 26 // The following are exported for tests only. 27 "CrashStore", 28 "dateToDays", 29 "getCrashManagerNoCreate", 30]; 31 32/** 33 * How long to wait after application startup before crash event files are 34 * automatically aggregated. 35 * 36 * We defer aggregation for performance reasons, as we don't want too many 37 * services competing for I/O immediately after startup. 38 */ 39const AGGREGATE_STARTUP_DELAY_MS = 57000; 40 41const MILLISECONDS_IN_DAY = 24 * 60 * 60 * 1000; 42 43// Converts Date to days since UNIX epoch. 44// This was copied from /services/metrics.storage.jsm. The implementation 45// does not account for leap seconds. 46function dateToDays(date) { 47 return Math.floor(date.getTime() / MILLISECONDS_IN_DAY); 48} 49 50/** 51 * Get a field from the specified object and remove it. 52 * 53 * @param obj {Object} The object holding the field 54 * @param field {String} The name of the field to be parsed and removed 55 * 56 * @returns {String} the field contents as a string, null if none was found 57 */ 58function getAndRemoveField(obj, field) { 59 let value = null; 60 61 if (field in obj) { 62 value = obj[field]; 63 delete obj[field]; 64 } 65 66 return value; 67} 68 69/** 70 * Parse the string stored in the specified field as JSON and then remove the 71 * field from the object. 72 * 73 * @param obj {Object} The object holding the field 74 * @param field {String} The name of the field to be parsed and removed 75 * 76 * @returns {Object} the parsed object, null if none was found 77 */ 78function parseAndRemoveField(obj, field) { 79 let value = null; 80 81 if (field in obj) { 82 try { 83 value = JSON.parse(obj[field]); 84 } catch (e) { 85 Cu.reportError(e); 86 } 87 88 delete obj[field]; 89 } 90 91 return value; 92} 93 94/** 95 * A gateway to crash-related data. 96 * 97 * This type is generic and can be instantiated any number of times. 98 * However, most applications will typically only have one instance 99 * instantiated and that instance will point to profile and user appdata 100 * directories. 101 * 102 * Instances are created by passing an object with properties. 103 * Recognized properties are: 104 * 105 * pendingDumpsDir (string) (required) 106 * Where dump files that haven't been uploaded are located. 107 * 108 * submittedDumpsDir (string) (required) 109 * Where records of uploaded dumps are located. 110 * 111 * eventsDirs (array) 112 * Directories (defined as strings) where events files are written. This 113 * instance will collects events from files in the directories specified. 114 * 115 * storeDir (string) 116 * Directory we will use for our data store. This instance will write 117 * data files into the directory specified. 118 * 119 * telemetryStoreSizeKey (string) 120 * Telemetry histogram to report store size under. 121 */ 122var CrashManager = function(options) { 123 for (let k in options) { 124 let value = options[k]; 125 126 switch (k) { 127 case "pendingDumpsDir": 128 case "submittedDumpsDir": 129 case "eventsDirs": 130 case "storeDir": 131 let key = "_" + k; 132 delete this[key]; 133 Object.defineProperty(this, key, { value }); 134 break; 135 case "telemetryStoreSizeKey": 136 this._telemetryStoreSizeKey = value; 137 break; 138 139 default: 140 throw new Error("Unknown property in options: " + k); 141 } 142 } 143 144 // Promise for in-progress aggregation operation. We store it on the 145 // object so it can be returned for in-progress operations. 146 this._aggregatePromise = null; 147 148 // Map of crash ID / promise tuples used to track adding new crashes. 149 this._crashPromises = new Map(); 150 151 // Promise for the crash ping used only for testing. 152 this._pingPromise = null; 153 154 // The CrashStore currently attached to this object. 155 this._store = null; 156 157 // A Task to retrieve the store. This is needed to avoid races when 158 // _getStore() is called multiple times in a short interval. 159 this._getStoreTask = null; 160 161 // The timer controlling the expiration of the CrashStore instance. 162 this._storeTimer = null; 163 164 // This is a semaphore that prevents the store from being freed by our 165 // timer-based resource freeing mechanism. 166 this._storeProtectedCount = 0; 167}; 168 169CrashManager.prototype = Object.freeze({ 170 // gen_CrashManager.py will input the proper process map informations. 171 /* SUBST: CRASH_MANAGER_PROCESS_MAP */ 172 173 // A real crash. 174 CRASH_TYPE_CRASH: "crash", 175 176 // A hang. 177 CRASH_TYPE_HANG: "hang", 178 179 // Submission result values. 180 SUBMISSION_RESULT_OK: "ok", 181 SUBMISSION_RESULT_FAILED: "failed", 182 183 DUMP_REGEX: /^([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.dmp$/i, 184 SUBMITTED_REGEX: /^bp-(?:hr-)?([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.txt$/i, 185 ALL_REGEX: /^(.*)$/, 186 187 // How long the store object should persist in memory before being 188 // automatically garbage collected. 189 STORE_EXPIRATION_MS: 60 * 1000, 190 191 // Number of days after which a crash with no activity will get purged. 192 PURGE_OLDER_THAN_DAYS: 180, 193 194 // The following are return codes for individual event file processing. 195 // File processed OK. 196 EVENT_FILE_SUCCESS: "ok", 197 // The event appears to be malformed. 198 EVENT_FILE_ERROR_MALFORMED: "malformed", 199 // The event is obsolete. 200 EVENT_FILE_ERROR_OBSOLETE: "obsolete", 201 // The type of event is unknown. 202 EVENT_FILE_ERROR_UNKNOWN_EVENT: "unknown-event", 203 204 _lazyGetDir(field, path, leaf) { 205 delete this[field]; 206 let value = PathUtils.join(path, leaf); 207 Object.defineProperty(this, field, { value }); 208 return value; 209 }, 210 211 get _crDir() { 212 return this._lazyGetDir( 213 "_crDir", 214 Services.dirsvc.get("UAppData", Ci.nsIFile).path, 215 "Crash Reports" 216 ); 217 }, 218 219 get _storeDir() { 220 return this._lazyGetDir( 221 "_storeDir", 222 Services.dirsvc.get("ProfD", Ci.nsIFile).path, 223 "crashes" 224 ); 225 }, 226 227 get _pendingDumpsDir() { 228 return this._lazyGetDir("_pendingDumpsDir", this._crDir, "pending"); 229 }, 230 231 get _submittedDumpsDir() { 232 return this._lazyGetDir("_submittedDumpsDir", this._crDir, "submitted"); 233 }, 234 235 get _eventsDirs() { 236 delete this._eventsDirs; 237 let value = [ 238 PathUtils.join(this._crDir, "events"), 239 PathUtils.join(this._storeDir, "events"), 240 ]; 241 Object.defineProperty(this, "_eventsDirs", { value }); 242 return value; 243 }, 244 245 /** 246 * Obtain a list of all dumps pending upload. 247 * 248 * The returned value is a promise that resolves to an array of objects 249 * on success. Each element in the array has the following properties: 250 * 251 * id (string) 252 * The ID of the crash (a UUID). 253 * 254 * path (string) 255 * The filename of the crash (<UUID.dmp>) 256 * 257 * date (Date) 258 * When this dump was created 259 * 260 * The returned arry is sorted by the modified time of the file backing 261 * the entry, oldest to newest. 262 * 263 * @return Promise<Array> 264 */ 265 pendingDumps() { 266 return this._getDirectoryEntries(this._pendingDumpsDir, this.DUMP_REGEX); 267 }, 268 269 /** 270 * Obtain a list of all dump files corresponding to submitted crashes. 271 * 272 * The returned value is a promise that resolves to an Array of 273 * objects. Each object has the following properties: 274 * 275 * path (string) 276 * The path of the file this entry comes from. 277 * 278 * id (string) 279 * The crash UUID. 280 * 281 * date (Date) 282 * The (estimated) date this crash was submitted. 283 * 284 * The returned array is sorted by the modified time of the file backing 285 * the entry, oldest to newest. 286 * 287 * @return Promise<Array> 288 */ 289 submittedDumps() { 290 return this._getDirectoryEntries( 291 this._submittedDumpsDir, 292 this.SUBMITTED_REGEX 293 ); 294 }, 295 296 /** 297 * Aggregates "loose" events files into the unified "database." 298 * 299 * This function should be called periodically to collect metadata from 300 * all events files into the central data store maintained by this manager. 301 * 302 * Once events have been stored in the backing store the corresponding 303 * source files are deleted. 304 * 305 * Only one aggregation operation is allowed to occur at a time. If this 306 * is called when an existing aggregation is in progress, the promise for 307 * the original call will be returned. 308 * 309 * @return promise<int> The number of event files that were examined. 310 */ 311 aggregateEventsFiles() { 312 if (this._aggregatePromise) { 313 return this._aggregatePromise; 314 } 315 316 return (this._aggregatePromise = (async () => { 317 if (this._aggregatePromise) { 318 return this._aggregatePromise; 319 } 320 321 try { 322 let unprocessedFiles = await this._getUnprocessedEventsFiles(); 323 324 let deletePaths = []; 325 let needsSave = false; 326 327 this._storeProtectedCount++; 328 for (let entry of unprocessedFiles) { 329 try { 330 let result = await this._processEventFile(entry); 331 332 switch (result) { 333 case this.EVENT_FILE_SUCCESS: 334 needsSave = true; 335 // Fall through. 336 337 case this.EVENT_FILE_ERROR_MALFORMED: 338 case this.EVENT_FILE_ERROR_OBSOLETE: 339 deletePaths.push(entry.path); 340 break; 341 342 case this.EVENT_FILE_ERROR_UNKNOWN_EVENT: 343 break; 344 345 default: 346 Cu.reportError( 347 "Unhandled crash event file return code. Please " + 348 "file a bug: " + 349 result 350 ); 351 } 352 } catch (ex) { 353 if (ex instanceof DOMException) { 354 this._log.warn("I/O error reading " + entry.path, ex); 355 } else { 356 // We should never encounter an exception. This likely represents 357 // a coding error because all errors should be detected and 358 // converted to return codes. 359 // 360 // If we get here, report the error and delete the source file 361 // so we don't see it again. 362 Cu.reportError( 363 "Exception when processing crash event file: " + 364 Log.exceptionStr(ex) 365 ); 366 deletePaths.push(entry.path); 367 } 368 } 369 } 370 371 if (needsSave) { 372 let store = await this._getStore(); 373 await store.save(); 374 } 375 376 for (let path of deletePaths) { 377 try { 378 await IOUtils.remove(path); 379 } catch (ex) { 380 this._log.warn("Error removing event file (" + path + ")", ex); 381 } 382 } 383 384 return unprocessedFiles.length; 385 } finally { 386 this._aggregatePromise = false; 387 this._storeProtectedCount--; 388 } 389 })()); 390 }, 391 392 /** 393 * Prune old crash data. 394 * 395 * @param date 396 * (Date) The cutoff point for pruning. Crashes without data newer 397 * than this will be pruned. 398 */ 399 pruneOldCrashes(date) { 400 return (async () => { 401 let store = await this._getStore(); 402 store.pruneOldCrashes(date); 403 await store.save(); 404 })(); 405 }, 406 407 /** 408 * Run tasks that should be periodically performed. 409 */ 410 runMaintenanceTasks() { 411 return (async () => { 412 await this.aggregateEventsFiles(); 413 414 let offset = this.PURGE_OLDER_THAN_DAYS * MILLISECONDS_IN_DAY; 415 await this.pruneOldCrashes(new Date(Date.now() - offset)); 416 })(); 417 }, 418 419 /** 420 * Schedule maintenance tasks for some point in the future. 421 * 422 * @param delay 423 * (integer) Delay in milliseconds when maintenance should occur. 424 */ 425 scheduleMaintenance(delay) { 426 let deferred = PromiseUtils.defer(); 427 428 setTimeout(() => { 429 this.runMaintenanceTasks().then(deferred.resolve, deferred.reject); 430 }, delay); 431 432 return deferred.promise; 433 }, 434 435 /** 436 * Record the occurrence of a crash. 437 * 438 * This method skips event files altogether and writes directly and 439 * immediately to the manager's data store. 440 * 441 * @param processType (string) One of the PROCESS_TYPE constants. 442 * @param crashType (string) One of the CRASH_TYPE constants. 443 * @param id (string) Crash ID. Likely a UUID. 444 * @param date (Date) When the crash occurred. 445 * @param metadata (dictionary) Crash metadata, may be empty. 446 * 447 * @return promise<null> Resolved when the store has been saved. 448 */ 449 addCrash(processType, crashType, id, date, metadata) { 450 let promise = (async () => { 451 if (!this.isValidProcessType(processType)) { 452 Cu.reportError( 453 "Unhandled process type. Please file a bug: '" + processType + 454 "'. Ignore in the context of " + 455 "test_crash_manager.js:test_addCrashWrong()."); 456 return; 457 } 458 459 let store = await this._getStore(); 460 if (store.addCrash(processType, crashType, id, date, metadata)) { 461 await store.save(); 462 } 463 464 let deferred = this._crashPromises.get(id); 465 466 if (deferred) { 467 this._crashPromises.delete(id); 468 deferred.resolve(); 469 } 470 471 if (this.isPingAllowed(processType)) { 472 this._sendCrashPing(id, processType, date, metadata); 473 } 474 })(); 475 476 return promise; 477 }, 478 479 /** 480 * Check that the processType parameter is a valid one: 481 * - it is a string 482 * - it is listed in this.processTypes 483 * 484 * @param processType (string) Process type to evaluate 485 * 486 * @return boolean True or false depending whether it is a legit one 487 */ 488 isValidProcessType(processType) { 489 if (typeof(processType) !== "string") { 490 return false; 491 } 492 493 for (const pt of Object.values(this.processTypes)) { 494 if (pt === processType) { 495 return true; 496 } 497 } 498 499 return false; 500 }, 501 502 /** 503 * Check that processType is allowed to send a ping 504 * 505 * @param processType (string) Process type to check for 506 * 507 * @return boolean True or False depending on whether ping is allowed 508 **/ 509 isPingAllowed(processType) { 510 // gen_CrashManager.py will input the proper process pings informations. 511 512 /* SUBST: CRASH_MANAGER_PROCESS_PINGS */ 513 514 // Should not even reach this because of isValidProcessType() but just in 515 // case we try to be cautious 516 if (!(processType in processPings)) { 517 return false; 518 } 519 520 return processPings[processType]; 521 }, 522 523 /** 524 * Returns a promise that is resolved only the crash with the specified id 525 * has been fully recorded. 526 * 527 * @param id (string) Crash ID. Likely a UUID. 528 * 529 * @return promise<null> Resolved when the crash is present. 530 */ 531 async ensureCrashIsPresent(id) { 532 let store = await this._getStore(); 533 let crash = store.getCrash(id); 534 535 if (crash) { 536 return Promise.resolve(); 537 } 538 539 let deferred = PromiseUtils.defer(); 540 541 this._crashPromises.set(id, deferred); 542 return deferred.promise; 543 }, 544 545 /** 546 * Record the remote ID for a crash. 547 * 548 * @param crashID (string) Crash ID. Likely a UUID. 549 * @param remoteID (Date) Server/Breakpad ID. 550 * 551 * @return boolean True if the remote ID was recorded. 552 */ 553 async setRemoteCrashID(crashID, remoteID) { 554 let store = await this._getStore(); 555 if (store.setRemoteCrashID(crashID, remoteID)) { 556 await store.save(); 557 } 558 }, 559 560 /** 561 * Generate a submission ID for use with addSubmission{Attempt,Result}. 562 */ 563 generateSubmissionID() { 564 return ( 565 "sub-" + 566 Services.uuid 567 .generateUUID() 568 .toString() 569 .slice(1, -1) 570 ); 571 }, 572 573 /** 574 * Record the occurrence of a submission attempt for a crash. 575 * 576 * @param crashID (string) Crash ID. Likely a UUID. 577 * @param submissionID (string) Submission ID. Likely a UUID. 578 * @param date (Date) When the attempt occurred. 579 * 580 * @return boolean True if the attempt was recorded and false if not. 581 */ 582 async addSubmissionAttempt(crashID, submissionID, date) { 583 let store = await this._getStore(); 584 if (store.addSubmissionAttempt(crashID, submissionID, date)) { 585 await store.save(); 586 } 587 }, 588 589 /** 590 * Record the occurrence of a submission result for a crash. 591 * 592 * @param crashID (string) Crash ID. Likely a UUID. 593 * @param submissionID (string) Submission ID. Likely a UUID. 594 * @param date (Date) When the submission result was obtained. 595 * @param result (string) One of the SUBMISSION_RESULT constants. 596 * 597 * @return boolean True if the result was recorded and false if not. 598 */ 599 async addSubmissionResult(crashID, submissionID, date, result) { 600 let store = await this._getStore(); 601 if (store.addSubmissionResult(crashID, submissionID, date, result)) { 602 await store.save(); 603 } 604 }, 605 606 /** 607 * Set the classification of a crash. 608 * 609 * @param crashID (string) Crash ID. Likely a UUID. 610 * @param classifications (array) Crash classifications. 611 * 612 * @return boolean True if the data was recorded and false if not. 613 */ 614 async setCrashClassifications(crashID, classifications) { 615 let store = await this._getStore(); 616 if (store.setCrashClassifications(crashID, classifications)) { 617 await store.save(); 618 } 619 }, 620 621 /** 622 * Obtain the paths of all unprocessed events files. 623 * 624 * The promise-resolved array is sorted by file mtime, oldest to newest. 625 */ 626 _getUnprocessedEventsFiles() { 627 return (async () => { 628 try { 629 let entries = []; 630 631 for (let dir of this._eventsDirs) { 632 for (let e of await this._getDirectoryEntries(dir, this.ALL_REGEX)) { 633 entries.push(e); 634 } 635 } 636 637 entries.sort((a, b) => { 638 return a.date - b.date; 639 }); 640 641 return entries; 642 } catch (e) { 643 Cu.reportError(e); 644 return []; 645 } 646 })(); 647 }, 648 649 // See docs/crash-events.rst for the file format specification. 650 _processEventFile(entry) { 651 return (async () => { 652 let data = await IOUtils.read(entry.path); 653 let store = await this._getStore(); 654 655 let decoder = new TextDecoder(); 656 data = decoder.decode(data); 657 658 let type, time; 659 let start = 0; 660 for (let i = 0; i < 2; i++) { 661 let index = data.indexOf("\n", start); 662 if (index == -1) { 663 return this.EVENT_FILE_ERROR_MALFORMED; 664 } 665 666 let sub = data.substring(start, index); 667 switch (i) { 668 case 0: 669 type = sub; 670 break; 671 case 1: 672 time = sub; 673 try { 674 time = parseInt(time, 10); 675 } catch (ex) { 676 return this.EVENT_FILE_ERROR_MALFORMED; 677 } 678 } 679 680 start = index + 1; 681 } 682 let date = new Date(time * 1000); 683 let payload = data.substring(start); 684 685 return this._handleEventFilePayload(store, entry, type, date, payload); 686 })(); 687 }, 688 689 _filterAnnotations(annotations) { 690 let filteredAnnotations = {}; 691 let crashReporter = Cc["@mozilla.org/toolkit/crash-reporter;1"].getService( 692 Ci.nsICrashReporter 693 ); 694 695 for (let line in annotations) { 696 try { 697 if (crashReporter.isAnnotationWhitelistedForPing(line)) { 698 filteredAnnotations[line] = annotations[line]; 699 } 700 } catch (e) { 701 // Silently drop unknown annotations 702 } 703 } 704 705 return filteredAnnotations; 706 }, 707 708 _sendCrashPing(crashId, type, date, metadata = {}) { 709 // If we have a saved environment, use it. Otherwise report 710 // the current environment. 711 let reportMeta = Cu.cloneInto(metadata, myScope); 712 let crashEnvironment = parseAndRemoveField( 713 reportMeta, 714 "TelemetryEnvironment" 715 ); 716 let sessionId = getAndRemoveField(reportMeta, "TelemetrySessionId"); 717 let stackTraces = getAndRemoveField(reportMeta, "StackTraces"); 718 let minidumpSha256Hash = getAndRemoveField( 719 reportMeta, 720 "MinidumpSha256Hash" 721 ); 722 723 // Filter the remaining annotations to remove privacy-sensitive ones 724 reportMeta = this._filterAnnotations(reportMeta); 725 726 this._pingPromise = TelemetryController.submitExternalPing( 727 "crash", 728 { 729 version: 1, 730 crashDate: date.toISOString().slice(0, 10), // YYYY-MM-DD 731 crashTime: date.toISOString().slice(0, 13) + ":00:00.000Z", // per-hour resolution 732 sessionId, 733 crashId, 734 minidumpSha256Hash, 735 processType: type, 736 stackTraces, 737 metadata: reportMeta, 738 hasCrashEnvironment: crashEnvironment !== null, 739 }, 740 { 741 addClientId: true, 742 addEnvironment: true, 743 overrideEnvironment: crashEnvironment, 744 } 745 ); 746 }, 747 748 _handleEventFilePayload(store, entry, type, date, payload) { 749 // The payload types and formats are documented in docs/crash-events.rst. 750 // Do not change the format of an existing type. Instead, invent a new 751 // type. 752 // DO NOT ADD NEW TYPES WITHOUT DOCUMENTING! 753 let lines = payload.split("\n"); 754 755 switch (type) { 756 case "crash.main.1": 757 case "crash.main.2": 758 return this.EVENT_FILE_ERROR_OBSOLETE; 759 760 case "crash.main.3": 761 let crashID = lines[0]; 762 let metadata = JSON.parse(lines[1]); 763 store.addCrash( 764 this.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT], 765 this.CRASH_TYPE_CRASH, 766 crashID, 767 date, 768 metadata 769 ); 770 771 if (!("CrashPingUUID" in metadata)) { 772 // If CrashPingUUID is not present then a ping was not generated 773 // by the crashreporter for this crash so we need to send one from 774 // here. 775 this._sendCrashPing(crashID, this.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT], date, metadata); 776 } 777 778 break; 779 780 case "crash.submission.1": 781 if (lines.length == 3) { 782 let [crashID, result, remoteID] = lines; 783 store.addCrash( 784 this.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT], 785 this.CRASH_TYPE_CRASH, 786 crashID, 787 date 788 ); 789 790 let submissionID = this.generateSubmissionID(); 791 let succeeded = result === "true"; 792 store.addSubmissionAttempt(crashID, submissionID, date); 793 store.addSubmissionResult( 794 crashID, 795 submissionID, 796 date, 797 succeeded 798 ? this.SUBMISSION_RESULT_OK 799 : this.SUBMISSION_RESULT_FAILED 800 ); 801 if (succeeded) { 802 store.setRemoteCrashID(crashID, remoteID); 803 } 804 } else { 805 return this.EVENT_FILE_ERROR_MALFORMED; 806 } 807 break; 808 809 default: 810 return this.EVENT_FILE_ERROR_UNKNOWN_EVENT; 811 } 812 813 return this.EVENT_FILE_SUCCESS; 814 }, 815 816 /** 817 * The resolved promise is an array of objects with the properties: 818 * 819 * path -- String filename 820 * id -- regexp.match()[1] (likely the crash ID) 821 * date -- Date mtime of the file 822 */ 823 _getDirectoryEntries(path, re) { 824 return (async function() { 825 let children = await IOUtils.getChildren(path); 826 let entries = []; 827 828 for (const entry of children) { 829 let stat = await IOUtils.stat(entry); 830 if (stat.type == "directory") { 831 continue; 832 } 833 834 let filename = PathUtils.filename(entry); 835 let match = re.exec(filename); 836 if (!match) { 837 continue; 838 } 839 entries.push({ 840 path: entry, 841 id: match[1], 842 date: stat.lastModified, 843 }); 844 } 845 846 entries.sort((a, b) => { 847 return a.date - b.date; 848 }); 849 850 return entries; 851 })(); 852 }, 853 854 _getStore() { 855 if (this._getStoreTask) { 856 return this._getStoreTask; 857 } 858 859 return (this._getStoreTask = (async () => { 860 try { 861 if (!this._store) { 862 await IOUtils.makeDirectory(this._storeDir, { 863 permissions: 0o700, 864 }); 865 866 let store = new CrashStore( 867 this._storeDir, 868 this._telemetryStoreSizeKey 869 ); 870 await store.load(); 871 872 this._store = store; 873 this._storeTimer = Cc["@mozilla.org/timer;1"].createInstance( 874 Ci.nsITimer 875 ); 876 } 877 878 // The application can go long periods without interacting with the 879 // store. Since the store takes up resources, we automatically "free" 880 // the store after inactivity so resources can be returned to the 881 // system. We do this via a timer and a mechanism that tracks when the 882 // store is being accessed. 883 this._storeTimer.cancel(); 884 885 // This callback frees resources from the store unless the store 886 // is protected from freeing by some other process. 887 let timerCB = () => { 888 if (this._storeProtectedCount) { 889 this._storeTimer.initWithCallback( 890 timerCB, 891 this.STORE_EXPIRATION_MS, 892 this._storeTimer.TYPE_ONE_SHOT 893 ); 894 return; 895 } 896 897 // We kill the reference that we hold. GC will kill it later. If 898 // someone else holds a reference, that will prevent GC until that 899 // reference is gone. 900 this._store = null; 901 this._storeTimer = null; 902 }; 903 904 this._storeTimer.initWithCallback( 905 timerCB, 906 this.STORE_EXPIRATION_MS, 907 this._storeTimer.TYPE_ONE_SHOT 908 ); 909 910 return this._store; 911 } finally { 912 this._getStoreTask = null; 913 } 914 })()); 915 }, 916 917 /** 918 * Obtain information about all known crashes. 919 * 920 * Returns an array of CrashRecord instances. Instances are read-only. 921 */ 922 getCrashes() { 923 return (async () => { 924 let store = await this._getStore(); 925 926 return store.crashes; 927 })(); 928 }, 929 930 getCrashCountsByDay() { 931 return (async () => { 932 let store = await this._getStore(); 933 934 return store._countsByDay; 935 })(); 936 }, 937}); 938 939var gCrashManager; 940 941/** 942 * Interface to storage of crash data. 943 * 944 * This type handles storage of crash metadata. It exists as a separate type 945 * from the crash manager for performance reasons: since all crash metadata 946 * needs to be loaded into memory for access, we wish to easily dispose of all 947 * associated memory when this data is no longer needed. Having an isolated 948 * object whose references can easily be lost faciliates that simple disposal. 949 * 950 * When metadata is updated, the caller must explicitly persist the changes 951 * to disk. This prevents excessive I/O during updates. 952 * 953 * The store has a mechanism for ensuring it doesn't grow too large. A ceiling 954 * is placed on the number of daily events that can occur for events that can 955 * occur with relatively high frequency. If we've reached 956 * the high water mark and new data arrives, it's silently dropped. 957 * However, the count of actual events is always preserved. This allows 958 * us to report on the severity of problems beyond the storage threshold. 959 * 960 * Main process crashes are excluded from limits because they are both 961 * important and should be rare. 962 * 963 * @param storeDir (string) 964 * Directory the store should be located in. 965 * @param telemetrySizeKey (string) 966 * The telemetry histogram that should be used to store the size 967 * of the data file. 968 */ 969function CrashStore(storeDir, telemetrySizeKey) { 970 this._storeDir = storeDir; 971 this._telemetrySizeKey = telemetrySizeKey; 972 973 this._storePath = PathUtils.join(storeDir, "store.json.mozlz4"); 974 975 // Holds the read data from disk. 976 this._data = null; 977 978 // Maps days since UNIX epoch to a Map of event types to counts. 979 // This data structure is populated when the JSON file is loaded 980 // and is also updated when new events are added. 981 this._countsByDay = new Map(); 982} 983 984CrashStore.prototype = Object.freeze({ 985 // Maximum number of events to store per day. This establishes a 986 // ceiling on the per-type/per-day records that will be stored. 987 HIGH_WATER_DAILY_THRESHOLD: 500, 988 989 /** 990 * Reset all data. 991 */ 992 reset() { 993 this._data = { 994 v: 1, 995 crashes: new Map(), 996 corruptDate: null, 997 }; 998 this._countsByDay = new Map(); 999 }, 1000 1001 /** 1002 * Load data from disk. 1003 * 1004 * @return Promise 1005 */ 1006 load() { 1007 return (async () => { 1008 // Loading replaces data. 1009 this.reset(); 1010 1011 try { 1012 let decoder = new TextDecoder(); 1013 let data = await IOUtils.read(this._storePath, { decompress: true }); 1014 data = JSON.parse(decoder.decode(data)); 1015 1016 if (data.corruptDate) { 1017 this._data.corruptDate = new Date(data.corruptDate); 1018 } 1019 1020 // actualCounts is used to validate that the derived counts by 1021 // days stored in the payload matches up to actual data. 1022 let actualCounts = new Map(); 1023 1024 // In the past, submissions were stored as separate crash records 1025 // with an id of e.g. "someID-submission". If we find IDs ending 1026 // with "-submission", we will need to convert the data to be stored 1027 // as actual submissions. 1028 // 1029 // The old way of storing submissions was used from FF33 - FF34. We 1030 // drop this old data on the floor. 1031 for (let id in data.crashes) { 1032 if (id.endsWith("-submission")) { 1033 continue; 1034 } 1035 1036 let crash = data.crashes[id]; 1037 let denormalized = this._denormalize(crash); 1038 1039 denormalized.submissions = new Map(); 1040 if (crash.submissions) { 1041 for (let submissionID in crash.submissions) { 1042 let submission = crash.submissions[submissionID]; 1043 denormalized.submissions.set( 1044 submissionID, 1045 this._denormalize(submission) 1046 ); 1047 } 1048 } 1049 1050 this._data.crashes.set(id, denormalized); 1051 1052 let key = 1053 dateToDays(denormalized.crashDate) + "-" + denormalized.type; 1054 actualCounts.set(key, (actualCounts.get(key) || 0) + 1); 1055 1056 // If we have an OOM size, count the crash as an OOM in addition to 1057 // being a main process crash. 1058 if ( 1059 denormalized.metadata && 1060 denormalized.metadata.OOMAllocationSize 1061 ) { 1062 let oomKey = key + "-oom"; 1063 actualCounts.set(oomKey, (actualCounts.get(oomKey) || 0) + 1); 1064 } 1065 } 1066 1067 // The validation in this loop is arguably not necessary. We perform 1068 // it as a defense against unknown bugs. 1069 for (let dayKey in data.countsByDay) { 1070 let day = parseInt(dayKey, 10); 1071 for (let type in data.countsByDay[day]) { 1072 this._ensureCountsForDay(day); 1073 1074 let count = data.countsByDay[day][type]; 1075 let key = day + "-" + type; 1076 1077 // If the payload says we have data for a given day but we 1078 // don't, the payload is wrong. Ignore it. 1079 if (!actualCounts.has(key)) { 1080 continue; 1081 } 1082 1083 // If we encountered more data in the payload than what the 1084 // data structure says, use the proper value. 1085 count = Math.max(count, actualCounts.get(key)); 1086 1087 this._countsByDay.get(day).set(type, count); 1088 } 1089 } 1090 } catch (ex) { 1091 // Missing files (first use) are allowed. 1092 if (!(ex instanceof DOMException) || ex.name != "NotFoundError") { 1093 // If we can't load for any reason, mark a corrupt date in the instance 1094 // and swallow the error. 1095 // 1096 // The marking of a corrupted file is intentionally not persisted to 1097 // disk yet. Instead, we wait until the next save(). This is to give 1098 // non-permanent failures the opportunity to recover on their own. 1099 this._data.corruptDate = new Date(); 1100 } 1101 } 1102 })(); 1103 }, 1104 1105 /** 1106 * Save data to disk. 1107 * 1108 * @return Promise<null> 1109 */ 1110 save() { 1111 return (async () => { 1112 if (!this._data) { 1113 return; 1114 } 1115 1116 let normalized = { 1117 // The version should be incremented whenever the format 1118 // changes. 1119 v: 1, 1120 // Maps crash IDs to objects defining the crash. 1121 crashes: {}, 1122 // Maps days since UNIX epoch to objects mapping event types to 1123 // counts. This is a mirror of this._countsByDay. e.g. 1124 // { 1125 // 15000: { 1126 // "main-crash": 2, 1127 // "plugin-crash": 1 1128 // } 1129 // } 1130 countsByDay: {}, 1131 1132 // When the store was last corrupted. 1133 corruptDate: null, 1134 }; 1135 1136 if (this._data.corruptDate) { 1137 normalized.corruptDate = this._data.corruptDate.getTime(); 1138 } 1139 1140 for (let [id, crash] of this._data.crashes) { 1141 let c = this._normalize(crash); 1142 1143 c.submissions = {}; 1144 for (let [submissionID, submission] of crash.submissions) { 1145 c.submissions[submissionID] = this._normalize(submission); 1146 } 1147 1148 normalized.crashes[id] = c; 1149 } 1150 1151 for (let [day, m] of this._countsByDay) { 1152 normalized.countsByDay[day] = {}; 1153 for (let [type, count] of m) { 1154 normalized.countsByDay[day][type] = count; 1155 } 1156 } 1157 1158 let encoder = new TextEncoder(); 1159 let data = encoder.encode(JSON.stringify(normalized)); 1160 let size = await IOUtils.write(this._storePath, data, { 1161 tmpPath: this._storePath + ".tmp", 1162 compress: true, 1163 }); 1164 if (this._telemetrySizeKey) { 1165 Services.telemetry.getHistogramById(this._telemetrySizeKey).add(size); 1166 } 1167 })(); 1168 }, 1169 1170 /** 1171 * Normalize an object into one fit for serialization. 1172 * 1173 * This function along with _denormalize() serve to hack around the 1174 * default handling of Date JSON serialization because Date serialization 1175 * is undefined by JSON. 1176 * 1177 * Fields ending with "Date" are assumed to contain Date instances. 1178 * We convert these to milliseconds since epoch on output and back to 1179 * Date on input. 1180 */ 1181 _normalize(o) { 1182 let normalized = {}; 1183 1184 for (let k in o) { 1185 let v = o[k]; 1186 if (v && k.endsWith("Date")) { 1187 normalized[k] = v.getTime(); 1188 } else { 1189 normalized[k] = v; 1190 } 1191 } 1192 1193 return normalized; 1194 }, 1195 1196 /** 1197 * Convert a serialized object back to its native form. 1198 */ 1199 _denormalize(o) { 1200 let n = {}; 1201 1202 for (let k in o) { 1203 let v = o[k]; 1204 if (v && k.endsWith("Date")) { 1205 n[k] = new Date(parseInt(v, 10)); 1206 } else { 1207 n[k] = v; 1208 } 1209 } 1210 1211 return n; 1212 }, 1213 1214 /** 1215 * Prune old crash data. 1216 * 1217 * Crashes without recent activity are pruned from the store so the 1218 * size of the store is not unbounded. If there is activity on a crash, 1219 * that activity will keep the crash and all its data around for longer. 1220 * 1221 * @param date 1222 * (Date) The cutoff at which data will be pruned. If an entry 1223 * doesn't have data newer than this, it will be pruned. 1224 */ 1225 pruneOldCrashes(date) { 1226 for (let crash of this.crashes) { 1227 let newest = crash.newestDate; 1228 if (!newest || newest.getTime() < date.getTime()) { 1229 this._data.crashes.delete(crash.id); 1230 } 1231 } 1232 }, 1233 1234 /** 1235 * Date the store was last corrupted and required a reset. 1236 * 1237 * May be null (no corruption has ever occurred) or a Date instance. 1238 */ 1239 get corruptDate() { 1240 return this._data.corruptDate; 1241 }, 1242 1243 /** 1244 * The number of distinct crashes tracked. 1245 */ 1246 get crashesCount() { 1247 return this._data.crashes.size; 1248 }, 1249 1250 /** 1251 * All crashes tracked. 1252 * 1253 * This is an array of CrashRecord. 1254 */ 1255 get crashes() { 1256 let crashes = []; 1257 for (let [, crash] of this._data.crashes) { 1258 crashes.push(new CrashRecord(crash)); 1259 } 1260 1261 return crashes; 1262 }, 1263 1264 /** 1265 * Obtain a particular crash from its ID. 1266 * 1267 * A CrashRecord will be returned if the crash exists. null will be returned 1268 * if the crash is unknown. 1269 */ 1270 getCrash(id) { 1271 for (let crash of this.crashes) { 1272 if (crash.id == id) { 1273 return crash; 1274 } 1275 } 1276 1277 return null; 1278 }, 1279 1280 _ensureCountsForDay(day) { 1281 if (!this._countsByDay.has(day)) { 1282 this._countsByDay.set(day, new Map()); 1283 } 1284 }, 1285 1286 /** 1287 * Ensure the crash record is present in storage. 1288 * 1289 * Returns the crash record if we're allowed to store it or null 1290 * if we've hit the high water mark. 1291 * 1292 * @param processType 1293 * (string) One of the PROCESS_TYPE constants. 1294 * @param crashType 1295 * (string) One of the CRASH_TYPE constants. 1296 * @param id 1297 * (string) The crash ID. 1298 * @param date 1299 * (Date) When this crash occurred. 1300 * @param metadata 1301 * (dictionary) Crash metadata, may be empty. 1302 * 1303 * @return null | object crash record 1304 */ 1305 _ensureCrashRecord(processType, crashType, id, date, metadata) { 1306 if (!id) { 1307 // Crashes are keyed on ID, so it's not really helpful to store crashes 1308 // without IDs. 1309 return null; 1310 } 1311 1312 let type = processType + "-" + crashType; 1313 1314 if (!this._data.crashes.has(id)) { 1315 let day = dateToDays(date); 1316 this._ensureCountsForDay(day); 1317 1318 let count = (this._countsByDay.get(day).get(type) || 0) + 1; 1319 this._countsByDay.get(day).set(type, count); 1320 1321 if ( 1322 count > this.HIGH_WATER_DAILY_THRESHOLD && 1323 processType != CrashManager.prototype.processTypes[Ci.nsIXULRuntime.PROCESS_TYPE_DEFAULT] 1324 ) { 1325 return null; 1326 } 1327 1328 // If we have an OOM size, count the crash as an OOM in addition to 1329 // being a main process crash. 1330 if (metadata && metadata.OOMAllocationSize) { 1331 let oomType = type + "-oom"; 1332 let oomCount = (this._countsByDay.get(day).get(oomType) || 0) + 1; 1333 this._countsByDay.get(day).set(oomType, oomCount); 1334 } 1335 1336 this._data.crashes.set(id, { 1337 id, 1338 remoteID: null, 1339 type, 1340 crashDate: date, 1341 submissions: new Map(), 1342 classifications: [], 1343 metadata, 1344 }); 1345 } 1346 1347 let crash = this._data.crashes.get(id); 1348 crash.type = type; 1349 crash.crashDate = date; 1350 1351 return crash; 1352 }, 1353 1354 /** 1355 * Record the occurrence of a crash. 1356 * 1357 * @param processType (string) One of the PROCESS_TYPE constants. 1358 * @param crashType (string) One of the CRASH_TYPE constants. 1359 * @param id (string) Crash ID. Likely a UUID. 1360 * @param date (Date) When the crash occurred. 1361 * @param metadata (dictionary) Crash metadata, may be empty. 1362 * 1363 * @return boolean True if the crash was recorded and false if not. 1364 */ 1365 addCrash(processType, crashType, id, date, metadata) { 1366 return !!this._ensureCrashRecord( 1367 processType, 1368 crashType, 1369 id, 1370 date, 1371 metadata 1372 ); 1373 }, 1374 1375 /** 1376 * @return boolean True if the remote ID was recorded and false if not. 1377 */ 1378 setRemoteCrashID(crashID, remoteID) { 1379 let crash = this._data.crashes.get(crashID); 1380 if (!crash || !remoteID) { 1381 return false; 1382 } 1383 1384 crash.remoteID = remoteID; 1385 return true; 1386 }, 1387 1388 /** 1389 * @param processType (string) One of the PROCESS_TYPE constants. 1390 * @param crashType (string) One of the CRASH_TYPE constants. 1391 * 1392 * @return array of crashes 1393 */ 1394 getCrashesOfType(processType, crashType) { 1395 let crashes = []; 1396 for (let crash of this.crashes) { 1397 if (crash.isOfType(processType, crashType)) { 1398 crashes.push(crash); 1399 } 1400 } 1401 1402 return crashes; 1403 }, 1404 1405 /** 1406 * Ensure the submission record is present in storage. 1407 * @returns [submission, crash] 1408 */ 1409 _ensureSubmissionRecord(crashID, submissionID) { 1410 let crash = this._data.crashes.get(crashID); 1411 if (!crash || !submissionID) { 1412 return null; 1413 } 1414 1415 if (!crash.submissions.has(submissionID)) { 1416 crash.submissions.set(submissionID, { 1417 requestDate: null, 1418 responseDate: null, 1419 result: null, 1420 }); 1421 } 1422 1423 return [crash.submissions.get(submissionID), crash]; 1424 }, 1425 1426 /** 1427 * @return boolean True if the attempt was recorded. 1428 */ 1429 addSubmissionAttempt(crashID, submissionID, date) { 1430 let [submission, crash] = this._ensureSubmissionRecord( 1431 crashID, 1432 submissionID 1433 ); 1434 if (!submission) { 1435 return false; 1436 } 1437 1438 submission.requestDate = date; 1439 Services.telemetry 1440 .getKeyedHistogramById("PROCESS_CRASH_SUBMIT_ATTEMPT") 1441 .add(crash.type, 1); 1442 return true; 1443 }, 1444 1445 /** 1446 * @return boolean True if the response was recorded. 1447 */ 1448 addSubmissionResult(crashID, submissionID, date, result) { 1449 let crash = this._data.crashes.get(crashID); 1450 if (!crash || !submissionID) { 1451 return false; 1452 } 1453 let submission = crash.submissions.get(submissionID); 1454 if (!submission) { 1455 return false; 1456 } 1457 1458 submission.responseDate = date; 1459 submission.result = result; 1460 Services.telemetry 1461 .getKeyedHistogramById("PROCESS_CRASH_SUBMIT_SUCCESS") 1462 .add(crash.type, result == "ok"); 1463 return true; 1464 }, 1465 1466 /** 1467 * @return boolean True if the classifications were set. 1468 */ 1469 setCrashClassifications(crashID, classifications) { 1470 let crash = this._data.crashes.get(crashID); 1471 if (!crash) { 1472 return false; 1473 } 1474 1475 crash.classifications = classifications; 1476 return true; 1477 }, 1478}); 1479 1480/** 1481 * Represents an individual crash with metadata. 1482 * 1483 * This is a wrapper around the low-level anonymous JS objects that define 1484 * crashes. It exposes a consistent and helpful API. 1485 * 1486 * Instances of this type should only be constructured inside this module, 1487 * not externally. The constructor is not considered a public API. 1488 * 1489 * @param o (object) 1490 * The crash's entry from the CrashStore. 1491 */ 1492function CrashRecord(o) { 1493 this._o = o; 1494} 1495 1496CrashRecord.prototype = Object.freeze({ 1497 get id() { 1498 return this._o.id; 1499 }, 1500 1501 get remoteID() { 1502 return this._o.remoteID; 1503 }, 1504 1505 get crashDate() { 1506 return this._o.crashDate; 1507 }, 1508 1509 /** 1510 * Obtain the newest date in this record. 1511 * 1512 * This is a convenience getter. The returned value is used to determine when 1513 * to expire a record. 1514 */ 1515 get newestDate() { 1516 // We currently only have 1 date, so this is easy. 1517 return this._o.crashDate; 1518 }, 1519 1520 get oldestDate() { 1521 return this._o.crashDate; 1522 }, 1523 1524 get type() { 1525 return this._o.type; 1526 }, 1527 1528 isOfType(processType, crashType) { 1529 return processType + "-" + crashType == this.type; 1530 }, 1531 1532 get submissions() { 1533 return this._o.submissions; 1534 }, 1535 1536 get classifications() { 1537 return this._o.classifications; 1538 }, 1539 1540 get metadata() { 1541 return this._o.metadata; 1542 }, 1543}); 1544 1545XPCOMUtils.defineLazyGetter(CrashManager, "_log", () => 1546 Log.repository.getLogger("Crashes.CrashManager") 1547); 1548 1549/** 1550 * Obtain the global CrashManager instance used by the running application. 1551 * 1552 * CrashManager is likely only ever instantiated once per application lifetime. 1553 * The main reason it's implemented as a reusable type is to facilitate testing. 1554 */ 1555XPCOMUtils.defineLazyGetter(CrashManager, "Singleton", function() { 1556 if (gCrashManager) { 1557 return gCrashManager; 1558 } 1559 1560 gCrashManager = new CrashManager({ 1561 telemetryStoreSizeKey: "CRASH_STORE_COMPRESSED_BYTES", 1562 }); 1563 1564 // Automatically aggregate event files shortly after startup. This 1565 // ensures it happens with some frequency. 1566 // 1567 // There are performance considerations here. While this is doing 1568 // work and could negatively impact performance, the amount of work 1569 // is kept small per run by periodically aggregating event files. 1570 // Furthermore, well-behaving installs should not have much work 1571 // here to do. If there is a lot of work, that install has bigger 1572 // issues beyond reduced performance near startup. 1573 gCrashManager.scheduleMaintenance(AGGREGATE_STARTUP_DELAY_MS); 1574 1575 return gCrashManager; 1576}); 1577 1578function getCrashManager() { 1579 return CrashManager.Singleton; 1580} 1581 1582/** 1583 * Used for tests to check the crash manager is created on profile creation. 1584 * 1585 * @returns {CrashManager} 1586 */ 1587function getCrashManagerNoCreate() { 1588 return gCrashManager; 1589} 1590