1/* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5"use strict"; 6 7const myScope = this; 8 9ChromeUtils.import("resource://gre/modules/KeyValueParser.jsm"); 10ChromeUtils.import("resource://gre/modules/Log.jsm", this); 11ChromeUtils.import("resource://gre/modules/osfile.jsm", this); 12ChromeUtils.import("resource://gre/modules/PromiseUtils.jsm"); 13ChromeUtils.import("resource://gre/modules/Services.jsm", this); 14ChromeUtils.import("resource://gre/modules/TelemetryController.jsm"); 15ChromeUtils.import("resource://gre/modules/Timer.jsm", this); 16ChromeUtils.import("resource://gre/modules/XPCOMUtils.jsm", this); 17 18var EXPORTED_SYMBOLS = [ 19 "CrashManager", 20]; 21 22/** 23 * How long to wait after application startup before crash event files are 24 * automatically aggregated. 25 * 26 * We defer aggregation for performance reasons, as we don't want too many 27 * services competing for I/O immediately after startup. 28 */ 29const AGGREGATE_STARTUP_DELAY_MS = 57000; 30 31const MILLISECONDS_IN_DAY = 24 * 60 * 60 * 1000; 32 33// Converts Date to days since UNIX epoch. 34// This was copied from /services/metrics.storage.jsm. The implementation 35// does not account for leap seconds. 36function dateToDays(date) { 37 return Math.floor(date.getTime() / MILLISECONDS_IN_DAY); 38} 39 40/** 41 * Get a field from the specified object and remove it. 42 * 43 * @param obj {Object} The object holding the field 44 * @param field {String} The name of the field to be parsed and removed 45 * 46 * @returns {String} the field contents as a string, null if none was found 47 */ 48function getAndRemoveField(obj, field) { 49 let value = null; 50 51 if (field in obj) { 52 // We split extra files on LF characters but Windows-generated ones might 53 // contain trailing CR characters so trim them here. 54 value = obj[field].trim(); 55 56 delete obj[field]; 57 } 58 59 return value; 60} 61 62/** 63 * Parse the string stored in the specified field as JSON and then remove the 64 * field from the object. 65 * 66 * @param obj {Object} The object holding the field 67 * @param field {String} The name of the field to be parsed and removed 68 * 69 * @returns {Object} the parsed object, null if none was found 70 */ 71function parseAndRemoveField(obj, field) { 72 let value = null; 73 74 if (field in obj) { 75 try { 76 value = JSON.parse(obj[field]); 77 } catch (e) { 78 Cu.reportError(e); 79 } 80 81 delete obj[field]; 82 } 83 84 return value; 85} 86 87/** 88 * A gateway to crash-related data. 89 * 90 * This type is generic and can be instantiated any number of times. 91 * However, most applications will typically only have one instance 92 * instantiated and that instance will point to profile and user appdata 93 * directories. 94 * 95 * Instances are created by passing an object with properties. 96 * Recognized properties are: 97 * 98 * pendingDumpsDir (string) (required) 99 * Where dump files that haven't been uploaded are located. 100 * 101 * submittedDumpsDir (string) (required) 102 * Where records of uploaded dumps are located. 103 * 104 * eventsDirs (array) 105 * Directories (defined as strings) where events files are written. This 106 * instance will collects events from files in the directories specified. 107 * 108 * storeDir (string) 109 * Directory we will use for our data store. This instance will write 110 * data files into the directory specified. 111 * 112 * telemetryStoreSizeKey (string) 113 * Telemetry histogram to report store size under. 114 */ 115var CrashManager = function(options) { 116 for (let k of ["pendingDumpsDir", "submittedDumpsDir", "eventsDirs", 117 "storeDir"]) { 118 if (!(k in options)) { 119 throw new Error("Required key not present in options: " + k); 120 } 121 } 122 123 this._log = Log.repository.getLogger("Crashes.CrashManager"); 124 125 for (let k in options) { 126 let v = options[k]; 127 128 switch (k) { 129 case "pendingDumpsDir": 130 this._pendingDumpsDir = v; 131 break; 132 133 case "submittedDumpsDir": 134 this._submittedDumpsDir = v; 135 break; 136 137 case "eventsDirs": 138 this._eventsDirs = v; 139 break; 140 141 case "storeDir": 142 this._storeDir = v; 143 break; 144 145 case "telemetryStoreSizeKey": 146 this._telemetryStoreSizeKey = v; 147 break; 148 149 default: 150 throw new Error("Unknown property in options: " + k); 151 } 152 } 153 154 // Promise for in-progress aggregation operation. We store it on the 155 // object so it can be returned for in-progress operations. 156 this._aggregatePromise = null; 157 158 // Map of crash ID / promise tuples used to track adding new crashes. 159 this._crashPromises = new Map(); 160 161 // Promise for the crash ping used only for testing. 162 this._pingPromise = null; 163 164 // The CrashStore currently attached to this object. 165 this._store = null; 166 167 // A Task to retrieve the store. This is needed to avoid races when 168 // _getStore() is called multiple times in a short interval. 169 this._getStoreTask = null; 170 171 // The timer controlling the expiration of the CrashStore instance. 172 this._storeTimer = null; 173 174 // This is a semaphore that prevents the store from being freed by our 175 // timer-based resource freeing mechanism. 176 this._storeProtectedCount = 0; 177}; 178 179this.CrashManager.prototype = Object.freeze({ 180 // A crash in the main process. 181 PROCESS_TYPE_MAIN: "main", 182 183 // A crash in a content process. 184 PROCESS_TYPE_CONTENT: "content", 185 186 // A crash in a plugin process. 187 PROCESS_TYPE_PLUGIN: "plugin", 188 189 // A crash in a Gecko media plugin process. 190 PROCESS_TYPE_GMPLUGIN: "gmplugin", 191 192 // A crash in the GPU process. 193 PROCESS_TYPE_GPU: "gpu", 194 195 // A real crash. 196 CRASH_TYPE_CRASH: "crash", 197 198 // A hang. 199 CRASH_TYPE_HANG: "hang", 200 201 // Submission result values. 202 SUBMISSION_RESULT_OK: "ok", 203 SUBMISSION_RESULT_FAILED: "failed", 204 205 DUMP_REGEX: /^([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.dmp$/i, 206 SUBMITTED_REGEX: /^bp-(?:hr-)?([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.txt$/i, 207 ALL_REGEX: /^(.*)$/, 208 209 // How long the store object should persist in memory before being 210 // automatically garbage collected. 211 STORE_EXPIRATION_MS: 60 * 1000, 212 213 // Number of days after which a crash with no activity will get purged. 214 PURGE_OLDER_THAN_DAYS: 180, 215 216 // The following are return codes for individual event file processing. 217 // File processed OK. 218 EVENT_FILE_SUCCESS: "ok", 219 // The event appears to be malformed. 220 EVENT_FILE_ERROR_MALFORMED: "malformed", 221 // The type of event is unknown. 222 EVENT_FILE_ERROR_UNKNOWN_EVENT: "unknown-event", 223 224 // A whitelist of crash annotations which do not contain sensitive data 225 // and are saved in the crash record and sent with Firefox Health Report. 226 ANNOTATION_WHITELIST: [ 227 "AsyncShutdownTimeout", 228 "BuildID", 229 "ipc_channel_error", 230 "ProductID", 231 "ProductName", 232 "ReleaseChannel", 233 "RemoteType", 234 "SecondsSinceLastCrash", 235 "ShutdownProgress", 236 "StartupCrash", 237 "TelemetryEnvironment", 238 "Version", 239 // The following entries are not normal annotations that can be found in 240 // the .extra file but are included in the crash record/FHR: 241 "AvailablePageFile", 242 "AvailablePhysicalMemory", 243 "AvailableVirtualMemory", 244 "BlockedDllList", 245 "BlocklistInitFailed", 246 "ContainsMemoryReport", 247 "CrashTime", 248 "EventLoopNestingLevel", 249 "IsGarbageCollecting", 250 "MozCrashReason", 251 "OOMAllocationSize", 252 "SystemMemoryUsePercentage", 253 "TextureUsage", 254 "TotalPageFile", 255 "TotalPhysicalMemory", 256 "TotalVirtualMemory", 257 "UptimeTS", 258 "User32BeforeBlocklist", 259 ], 260 261 /** 262 * Obtain a list of all dumps pending upload. 263 * 264 * The returned value is a promise that resolves to an array of objects 265 * on success. Each element in the array has the following properties: 266 * 267 * id (string) 268 * The ID of the crash (a UUID). 269 * 270 * path (string) 271 * The filename of the crash (<UUID.dmp>) 272 * 273 * date (Date) 274 * When this dump was created 275 * 276 * The returned arry is sorted by the modified time of the file backing 277 * the entry, oldest to newest. 278 * 279 * @return Promise<Array> 280 */ 281 pendingDumps() { 282 return this._getDirectoryEntries(this._pendingDumpsDir, this.DUMP_REGEX); 283 }, 284 285 /** 286 * Obtain a list of all dump files corresponding to submitted crashes. 287 * 288 * The returned value is a promise that resolves to an Array of 289 * objects. Each object has the following properties: 290 * 291 * path (string) 292 * The path of the file this entry comes from. 293 * 294 * id (string) 295 * The crash UUID. 296 * 297 * date (Date) 298 * The (estimated) date this crash was submitted. 299 * 300 * The returned array is sorted by the modified time of the file backing 301 * the entry, oldest to newest. 302 * 303 * @return Promise<Array> 304 */ 305 submittedDumps() { 306 return this._getDirectoryEntries(this._submittedDumpsDir, 307 this.SUBMITTED_REGEX); 308 }, 309 310 /** 311 * Aggregates "loose" events files into the unified "database." 312 * 313 * This function should be called periodically to collect metadata from 314 * all events files into the central data store maintained by this manager. 315 * 316 * Once events have been stored in the backing store the corresponding 317 * source files are deleted. 318 * 319 * Only one aggregation operation is allowed to occur at a time. If this 320 * is called when an existing aggregation is in progress, the promise for 321 * the original call will be returned. 322 * 323 * @return promise<int> The number of event files that were examined. 324 */ 325 aggregateEventsFiles() { 326 if (this._aggregatePromise) { 327 return this._aggregatePromise; 328 } 329 330 return this._aggregatePromise = (async () => { 331 if (this._aggregatePromise) { 332 return this._aggregatePromise; 333 } 334 335 try { 336 let unprocessedFiles = await this._getUnprocessedEventsFiles(); 337 338 let deletePaths = []; 339 let needsSave = false; 340 341 this._storeProtectedCount++; 342 for (let entry of unprocessedFiles) { 343 try { 344 let result = await this._processEventFile(entry); 345 346 switch (result) { 347 case this.EVENT_FILE_SUCCESS: 348 needsSave = true; 349 // Fall through. 350 351 case this.EVENT_FILE_ERROR_MALFORMED: 352 deletePaths.push(entry.path); 353 break; 354 355 case this.EVENT_FILE_ERROR_UNKNOWN_EVENT: 356 break; 357 358 default: 359 Cu.reportError("Unhandled crash event file return code. Please " + 360 "file a bug: " + result); 361 } 362 } catch (ex) { 363 if (ex instanceof OS.File.Error) { 364 this._log.warn("I/O error reading " + entry.path, ex); 365 } else { 366 // We should never encounter an exception. This likely represents 367 // a coding error because all errors should be detected and 368 // converted to return codes. 369 // 370 // If we get here, report the error and delete the source file 371 // so we don't see it again. 372 Cu.reportError("Exception when processing crash event file: " + 373 Log.exceptionStr(ex)); 374 deletePaths.push(entry.path); 375 } 376 } 377 } 378 379 if (needsSave) { 380 let store = await this._getStore(); 381 await store.save(); 382 } 383 384 for (let path of deletePaths) { 385 try { 386 await OS.File.remove(path); 387 } catch (ex) { 388 this._log.warn("Error removing event file (" + path + ")", ex); 389 } 390 } 391 392 return unprocessedFiles.length; 393 394 } finally { 395 this._aggregatePromise = false; 396 this._storeProtectedCount--; 397 } 398 })(); 399 }, 400 401 /** 402 * Prune old crash data. 403 * 404 * @param date 405 * (Date) The cutoff point for pruning. Crashes without data newer 406 * than this will be pruned. 407 */ 408 pruneOldCrashes(date) { 409 return (async () => { 410 let store = await this._getStore(); 411 store.pruneOldCrashes(date); 412 await store.save(); 413 })(); 414 }, 415 416 /** 417 * Run tasks that should be periodically performed. 418 */ 419 runMaintenanceTasks() { 420 return (async () => { 421 await this.aggregateEventsFiles(); 422 423 let offset = this.PURGE_OLDER_THAN_DAYS * MILLISECONDS_IN_DAY; 424 await this.pruneOldCrashes(new Date(Date.now() - offset)); 425 })(); 426 }, 427 428 /** 429 * Schedule maintenance tasks for some point in the future. 430 * 431 * @param delay 432 * (integer) Delay in milliseconds when maintenance should occur. 433 */ 434 scheduleMaintenance(delay) { 435 let deferred = PromiseUtils.defer(); 436 437 setTimeout(() => { 438 this.runMaintenanceTasks().then(deferred.resolve, deferred.reject); 439 }, delay); 440 441 return deferred.promise; 442 }, 443 444 /** 445 * Record the occurrence of a crash. 446 * 447 * This method skips event files altogether and writes directly and 448 * immediately to the manager's data store. 449 * 450 * @param processType (string) One of the PROCESS_TYPE constants. 451 * @param crashType (string) One of the CRASH_TYPE constants. 452 * @param id (string) Crash ID. Likely a UUID. 453 * @param date (Date) When the crash occurred. 454 * @param metadata (dictionary) Crash metadata, may be empty. 455 * 456 * @return promise<null> Resolved when the store has been saved. 457 */ 458 addCrash(processType, crashType, id, date, metadata) { 459 let promise = (async () => { 460 let store = await this._getStore(); 461 if (store.addCrash(processType, crashType, id, date, metadata)) { 462 await store.save(); 463 } 464 465 let deferred = this._crashPromises.get(id); 466 467 if (deferred) { 468 this._crashPromises.delete(id); 469 deferred.resolve(); 470 } 471 472 // Send a telemetry ping for each non-main process crash 473 if (processType === this.PROCESS_TYPE_CONTENT || 474 processType === this.PROCESS_TYPE_GPU) { 475 this._sendCrashPing(id, processType, date, metadata); 476 } 477 })(); 478 479 return promise; 480 }, 481 482 /** 483 * Returns a promise that is resolved only the crash with the specified id 484 * has been fully recorded. 485 * 486 * @param id (string) Crash ID. Likely a UUID. 487 * 488 * @return promise<null> Resolved when the crash is present. 489 */ 490 async ensureCrashIsPresent(id) { 491 let store = await this._getStore(); 492 let crash = store.getCrash(id); 493 494 if (crash) { 495 return Promise.resolve(); 496 } 497 498 let deferred = PromiseUtils.defer(); 499 500 this._crashPromises.set(id, deferred); 501 return deferred.promise; 502 }, 503 504 /** 505 * Record the remote ID for a crash. 506 * 507 * @param crashID (string) Crash ID. Likely a UUID. 508 * @param remoteID (Date) Server/Breakpad ID. 509 * 510 * @return boolean True if the remote ID was recorded. 511 */ 512 async setRemoteCrashID(crashID, remoteID) { 513 let store = await this._getStore(); 514 if (store.setRemoteCrashID(crashID, remoteID)) { 515 await store.save(); 516 } 517 }, 518 519 /** 520 * Generate a submission ID for use with addSubmission{Attempt,Result}. 521 */ 522 generateSubmissionID() { 523 return "sub-" + Cc["@mozilla.org/uuid-generator;1"] 524 .getService(Ci.nsIUUIDGenerator) 525 .generateUUID().toString().slice(1, -1); 526 }, 527 528 /** 529 * Record the occurrence of a submission attempt for a crash. 530 * 531 * @param crashID (string) Crash ID. Likely a UUID. 532 * @param submissionID (string) Submission ID. Likely a UUID. 533 * @param date (Date) When the attempt occurred. 534 * 535 * @return boolean True if the attempt was recorded and false if not. 536 */ 537 async addSubmissionAttempt(crashID, submissionID, date) { 538 let store = await this._getStore(); 539 if (store.addSubmissionAttempt(crashID, submissionID, date)) { 540 await store.save(); 541 } 542 }, 543 544 /** 545 * Record the occurrence of a submission result for a crash. 546 * 547 * @param crashID (string) Crash ID. Likely a UUID. 548 * @param submissionID (string) Submission ID. Likely a UUID. 549 * @param date (Date) When the submission result was obtained. 550 * @param result (string) One of the SUBMISSION_RESULT constants. 551 * 552 * @return boolean True if the result was recorded and false if not. 553 */ 554 async addSubmissionResult(crashID, submissionID, date, result) { 555 let store = await this._getStore(); 556 if (store.addSubmissionResult(crashID, submissionID, date, result)) { 557 await store.save(); 558 } 559 }, 560 561 /** 562 * Set the classification of a crash. 563 * 564 * @param crashID (string) Crash ID. Likely a UUID. 565 * @param classifications (array) Crash classifications. 566 * 567 * @return boolean True if the data was recorded and false if not. 568 */ 569 async setCrashClassifications(crashID, classifications) { 570 let store = await this._getStore(); 571 if (store.setCrashClassifications(crashID, classifications)) { 572 await store.save(); 573 } 574 }, 575 576 /** 577 * Obtain the paths of all unprocessed events files. 578 * 579 * The promise-resolved array is sorted by file mtime, oldest to newest. 580 */ 581 _getUnprocessedEventsFiles() { 582 return (async () => { 583 let entries = []; 584 585 for (let dir of this._eventsDirs) { 586 for (let e of await this._getDirectoryEntries(dir, this.ALL_REGEX)) { 587 entries.push(e); 588 } 589 } 590 591 entries.sort((a, b) => { return a.date - b.date; }); 592 593 return entries; 594 })(); 595 }, 596 597 // See docs/crash-events.rst for the file format specification. 598 _processEventFile(entry) { 599 return (async () => { 600 let data = await OS.File.read(entry.path); 601 let store = await this._getStore(); 602 603 let decoder = new TextDecoder(); 604 data = decoder.decode(data); 605 606 let type, time; 607 let start = 0; 608 for (let i = 0; i < 2; i++) { 609 let index = data.indexOf("\n", start); 610 if (index == -1) { 611 return this.EVENT_FILE_ERROR_MALFORMED; 612 } 613 614 let sub = data.substring(start, index); 615 switch (i) { 616 case 0: 617 type = sub; 618 break; 619 case 1: 620 time = sub; 621 try { 622 time = parseInt(time, 10); 623 } catch (ex) { 624 return this.EVENT_FILE_ERROR_MALFORMED; 625 } 626 } 627 628 start = index + 1; 629 } 630 let date = new Date(time * 1000); 631 let payload = data.substring(start); 632 633 return this._handleEventFilePayload(store, entry, type, date, payload); 634 })(); 635 }, 636 637 _filterAnnotations(annotations) { 638 let filteredAnnotations = {}; 639 640 for (let line in annotations) { 641 if (this.ANNOTATION_WHITELIST.includes(line)) { 642 filteredAnnotations[line] = annotations[line]; 643 } 644 } 645 646 return filteredAnnotations; 647 }, 648 649 _sendCrashPing(crashId, type, date, metadata = {}) { 650 // If we have a saved environment, use it. Otherwise report 651 // the current environment. 652 let reportMeta = Cu.cloneInto(metadata, myScope); 653 let crashEnvironment = parseAndRemoveField(reportMeta, 654 "TelemetryEnvironment"); 655 let sessionId = getAndRemoveField(reportMeta, "TelemetrySessionId"); 656 let stackTraces = parseAndRemoveField(reportMeta, "StackTraces"); 657 let minidumpSha256Hash = getAndRemoveField(reportMeta, 658 "MinidumpSha256Hash"); 659 660 // Filter the remaining annotations to remove privacy-sensitive ones 661 reportMeta = this._filterAnnotations(reportMeta); 662 663 this._pingPromise = TelemetryController.submitExternalPing("crash", 664 { 665 version: 1, 666 crashDate: date.toISOString().slice(0, 10), // YYYY-MM-DD 667 crashTime: date.toISOString().slice(0, 13) + ":00:00.000Z", // per-hour resolution 668 sessionId, 669 crashId, 670 minidumpSha256Hash, 671 processType: type, 672 stackTraces, 673 metadata: reportMeta, 674 hasCrashEnvironment: (crashEnvironment !== null), 675 }, 676 { 677 addClientId: true, 678 addEnvironment: true, 679 overrideEnvironment: crashEnvironment, 680 } 681 ); 682 }, 683 684 _handleEventFilePayload(store, entry, type, date, payload) { 685 // The payload types and formats are documented in docs/crash-events.rst. 686 // Do not change the format of an existing type. Instead, invent a new 687 // type. 688 // DO NOT ADD NEW TYPES WITHOUT DOCUMENTING! 689 let lines = payload.split("\n"); 690 691 switch (type) { 692 case "crash.main.1": 693 if (lines.length > 1) { 694 this._log.warn("Multiple lines unexpected in payload for " + 695 entry.path); 696 return this.EVENT_FILE_ERROR_MALFORMED; 697 } 698 // fall-through 699 case "crash.main.2": 700 let crashID = lines[0]; 701 let metadata = parseKeyValuePairsFromLines(lines.slice(1)); 702 store.addCrash(this.PROCESS_TYPE_MAIN, this.CRASH_TYPE_CRASH, 703 crashID, date, metadata); 704 705 if (!("CrashPingUUID" in metadata)) { 706 // If CrashPingUUID is not present then a ping was not generated 707 // by the crashreporter for this crash so we need to send one from 708 // here. 709 this._sendCrashPing(crashID, this.PROCESS_TYPE_MAIN, date, 710 metadata); 711 } 712 713 break; 714 715 case "crash.submission.1": 716 if (lines.length == 3) { 717 let [crashID, result, remoteID] = lines; 718 store.addCrash(this.PROCESS_TYPE_MAIN, this.CRASH_TYPE_CRASH, 719 crashID, date); 720 721 let submissionID = this.generateSubmissionID(); 722 let succeeded = result === "true"; 723 store.addSubmissionAttempt(crashID, submissionID, date); 724 store.addSubmissionResult(crashID, submissionID, date, 725 succeeded ? this.SUBMISSION_RESULT_OK : 726 this.SUBMISSION_RESULT_FAILED); 727 if (succeeded) { 728 store.setRemoteCrashID(crashID, remoteID); 729 } 730 } else { 731 return this.EVENT_FILE_ERROR_MALFORMED; 732 } 733 break; 734 735 default: 736 return this.EVENT_FILE_ERROR_UNKNOWN_EVENT; 737 } 738 739 return this.EVENT_FILE_SUCCESS; 740 }, 741 742 /** 743 * The resolved promise is an array of objects with the properties: 744 * 745 * path -- String filename 746 * id -- regexp.match()[1] (likely the crash ID) 747 * date -- Date mtime of the file 748 */ 749 _getDirectoryEntries(path, re) { 750 return (async function() { 751 try { 752 await OS.File.stat(path); 753 } catch (ex) { 754 if (!(ex instanceof OS.File.Error) || !ex.becauseNoSuchFile) { 755 throw ex; 756 } 757 return []; 758 } 759 760 let it = new OS.File.DirectoryIterator(path); 761 let entries = []; 762 763 try { 764 await it.forEach((entry, index, it) => { 765 if (entry.isDir) { 766 return undefined; 767 } 768 769 let match = re.exec(entry.name); 770 if (!match) { 771 return undefined; 772 } 773 774 return OS.File.stat(entry.path).then((info) => { 775 entries.push({ 776 path: entry.path, 777 id: match[1], 778 date: info.lastModificationDate, 779 }); 780 }); 781 }); 782 } finally { 783 it.close(); 784 } 785 786 entries.sort((a, b) => { return a.date - b.date; }); 787 788 return entries; 789 })(); 790 }, 791 792 _getStore() { 793 if (this._getStoreTask) { 794 return this._getStoreTask; 795 } 796 797 return this._getStoreTask = (async () => { 798 try { 799 if (!this._store) { 800 await OS.File.makeDir(this._storeDir, { 801 ignoreExisting: true, 802 unixMode: OS.Constants.libc.S_IRWXU, 803 }); 804 805 let store = new CrashStore(this._storeDir, 806 this._telemetryStoreSizeKey); 807 await store.load(); 808 809 this._store = store; 810 this._storeTimer = Cc["@mozilla.org/timer;1"] 811 .createInstance(Ci.nsITimer); 812 } 813 814 // The application can go long periods without interacting with the 815 // store. Since the store takes up resources, we automatically "free" 816 // the store after inactivity so resources can be returned to the 817 // system. We do this via a timer and a mechanism that tracks when the 818 // store is being accessed. 819 this._storeTimer.cancel(); 820 821 // This callback frees resources from the store unless the store 822 // is protected from freeing by some other process. 823 let timerCB = () => { 824 if (this._storeProtectedCount) { 825 this._storeTimer.initWithCallback(timerCB, this.STORE_EXPIRATION_MS, 826 this._storeTimer.TYPE_ONE_SHOT); 827 return; 828 } 829 830 // We kill the reference that we hold. GC will kill it later. If 831 // someone else holds a reference, that will prevent GC until that 832 // reference is gone. 833 this._store = null; 834 this._storeTimer = null; 835 }; 836 837 this._storeTimer.initWithCallback(timerCB, this.STORE_EXPIRATION_MS, 838 this._storeTimer.TYPE_ONE_SHOT); 839 840 return this._store; 841 } finally { 842 this._getStoreTask = null; 843 } 844 })(); 845 }, 846 847 /** 848 * Obtain information about all known crashes. 849 * 850 * Returns an array of CrashRecord instances. Instances are read-only. 851 */ 852 getCrashes() { 853 return (async () => { 854 let store = await this._getStore(); 855 856 return store.crashes; 857 })(); 858 }, 859 860 getCrashCountsByDay() { 861 return (async () => { 862 let store = await this._getStore(); 863 864 return store._countsByDay; 865 })(); 866 }, 867}); 868 869var gCrashManager; 870 871/** 872 * Interface to storage of crash data. 873 * 874 * This type handles storage of crash metadata. It exists as a separate type 875 * from the crash manager for performance reasons: since all crash metadata 876 * needs to be loaded into memory for access, we wish to easily dispose of all 877 * associated memory when this data is no longer needed. Having an isolated 878 * object whose references can easily be lost faciliates that simple disposal. 879 * 880 * When metadata is updated, the caller must explicitly persist the changes 881 * to disk. This prevents excessive I/O during updates. 882 * 883 * The store has a mechanism for ensuring it doesn't grow too large. A ceiling 884 * is placed on the number of daily events that can occur for events that can 885 * occur with relatively high frequency, notably plugin crashes and hangs 886 * (plugins can enter cycles where they repeatedly crash). If we've reached 887 * the high water mark and new data arrives, it's silently dropped. 888 * However, the count of actual events is always preserved. This allows 889 * us to report on the severity of problems beyond the storage threshold. 890 * 891 * Main process crashes are excluded from limits because they are both 892 * important and should be rare. 893 * 894 * @param storeDir (string) 895 * Directory the store should be located in. 896 * @param telemetrySizeKey (string) 897 * The telemetry histogram that should be used to store the size 898 * of the data file. 899 */ 900function CrashStore(storeDir, telemetrySizeKey) { 901 this._storeDir = storeDir; 902 this._telemetrySizeKey = telemetrySizeKey; 903 904 this._storePath = OS.Path.join(storeDir, "store.json.mozlz4"); 905 906 // Holds the read data from disk. 907 this._data = null; 908 909 // Maps days since UNIX epoch to a Map of event types to counts. 910 // This data structure is populated when the JSON file is loaded 911 // and is also updated when new events are added. 912 this._countsByDay = new Map(); 913} 914 915CrashStore.prototype = Object.freeze({ 916 // Maximum number of events to store per day. This establishes a 917 // ceiling on the per-type/per-day records that will be stored. 918 HIGH_WATER_DAILY_THRESHOLD: 100, 919 920 /** 921 * Reset all data. 922 */ 923 reset() { 924 this._data = { 925 v: 1, 926 crashes: new Map(), 927 corruptDate: null, 928 }; 929 this._countsByDay = new Map(); 930 }, 931 932 /** 933 * Load data from disk. 934 * 935 * @return Promise 936 */ 937 load() { 938 return (async () => { 939 // Loading replaces data. 940 this.reset(); 941 942 try { 943 let decoder = new TextDecoder(); 944 let data = await OS.File.read(this._storePath, {compression: "lz4"}); 945 data = JSON.parse(decoder.decode(data)); 946 947 if (data.corruptDate) { 948 this._data.corruptDate = new Date(data.corruptDate); 949 } 950 951 // actualCounts is used to validate that the derived counts by 952 // days stored in the payload matches up to actual data. 953 let actualCounts = new Map(); 954 955 // In the past, submissions were stored as separate crash records 956 // with an id of e.g. "someID-submission". If we find IDs ending 957 // with "-submission", we will need to convert the data to be stored 958 // as actual submissions. 959 // 960 // The old way of storing submissions was used from FF33 - FF34. We 961 // drop this old data on the floor. 962 for (let id in data.crashes) { 963 if (id.endsWith("-submission")) { 964 continue; 965 } 966 967 let crash = data.crashes[id]; 968 let denormalized = this._denormalize(crash); 969 970 denormalized.submissions = new Map(); 971 if (crash.submissions) { 972 for (let submissionID in crash.submissions) { 973 let submission = crash.submissions[submissionID]; 974 denormalized.submissions.set(submissionID, 975 this._denormalize(submission)); 976 } 977 } 978 979 this._data.crashes.set(id, denormalized); 980 981 let key = dateToDays(denormalized.crashDate) + "-" + denormalized.type; 982 actualCounts.set(key, (actualCounts.get(key) || 0) + 1); 983 984 // If we have an OOM size, count the crash as an OOM in addition to 985 // being a main process crash. 986 if (denormalized.metadata && 987 denormalized.metadata.OOMAllocationSize) { 988 let oomKey = key + "-oom"; 989 actualCounts.set(oomKey, (actualCounts.get(oomKey) || 0) + 1); 990 } 991 992 } 993 994 // The validation in this loop is arguably not necessary. We perform 995 // it as a defense against unknown bugs. 996 for (let dayKey in data.countsByDay) { 997 let day = parseInt(dayKey, 10); 998 for (let type in data.countsByDay[day]) { 999 this._ensureCountsForDay(day); 1000 1001 let count = data.countsByDay[day][type]; 1002 let key = day + "-" + type; 1003 1004 // If the payload says we have data for a given day but we 1005 // don't, the payload is wrong. Ignore it. 1006 if (!actualCounts.has(key)) { 1007 continue; 1008 } 1009 1010 // If we encountered more data in the payload than what the 1011 // data structure says, use the proper value. 1012 count = Math.max(count, actualCounts.get(key)); 1013 1014 this._countsByDay.get(day).set(type, count); 1015 } 1016 } 1017 } catch (ex) { 1018 // Missing files (first use) are allowed. 1019 if (!(ex instanceof OS.File.Error) || !ex.becauseNoSuchFile) { 1020 // If we can't load for any reason, mark a corrupt date in the instance 1021 // and swallow the error. 1022 // 1023 // The marking of a corrupted file is intentionally not persisted to 1024 // disk yet. Instead, we wait until the next save(). This is to give 1025 // non-permanent failures the opportunity to recover on their own. 1026 this._data.corruptDate = new Date(); 1027 } 1028 } 1029 })(); 1030 }, 1031 1032 /** 1033 * Save data to disk. 1034 * 1035 * @return Promise<null> 1036 */ 1037 save() { 1038 return (async () => { 1039 if (!this._data) { 1040 return; 1041 } 1042 1043 let normalized = { 1044 // The version should be incremented whenever the format 1045 // changes. 1046 v: 1, 1047 // Maps crash IDs to objects defining the crash. 1048 crashes: {}, 1049 // Maps days since UNIX epoch to objects mapping event types to 1050 // counts. This is a mirror of this._countsByDay. e.g. 1051 // { 1052 // 15000: { 1053 // "main-crash": 2, 1054 // "plugin-crash": 1 1055 // } 1056 // } 1057 countsByDay: {}, 1058 1059 // When the store was last corrupted. 1060 corruptDate: null, 1061 }; 1062 1063 if (this._data.corruptDate) { 1064 normalized.corruptDate = this._data.corruptDate.getTime(); 1065 } 1066 1067 for (let [id, crash] of this._data.crashes) { 1068 let c = this._normalize(crash); 1069 1070 c.submissions = {}; 1071 for (let [submissionID, submission] of crash.submissions) { 1072 c.submissions[submissionID] = this._normalize(submission); 1073 } 1074 1075 normalized.crashes[id] = c; 1076 } 1077 1078 for (let [day, m] of this._countsByDay) { 1079 normalized.countsByDay[day] = {}; 1080 for (let [type, count] of m) { 1081 normalized.countsByDay[day][type] = count; 1082 } 1083 } 1084 1085 let encoder = new TextEncoder(); 1086 let data = encoder.encode(JSON.stringify(normalized)); 1087 let size = await OS.File.writeAtomic(this._storePath, data, { 1088 tmpPath: this._storePath + ".tmp", 1089 compression: "lz4"}); 1090 if (this._telemetrySizeKey) { 1091 Services.telemetry.getHistogramById(this._telemetrySizeKey).add(size); 1092 } 1093 })(); 1094 }, 1095 1096 /** 1097 * Normalize an object into one fit for serialization. 1098 * 1099 * This function along with _denormalize() serve to hack around the 1100 * default handling of Date JSON serialization because Date serialization 1101 * is undefined by JSON. 1102 * 1103 * Fields ending with "Date" are assumed to contain Date instances. 1104 * We convert these to milliseconds since epoch on output and back to 1105 * Date on input. 1106 */ 1107 _normalize(o) { 1108 let normalized = {}; 1109 1110 for (let k in o) { 1111 let v = o[k]; 1112 if (v && k.endsWith("Date")) { 1113 normalized[k] = v.getTime(); 1114 } else { 1115 normalized[k] = v; 1116 } 1117 } 1118 1119 return normalized; 1120 }, 1121 1122 /** 1123 * Convert a serialized object back to its native form. 1124 */ 1125 _denormalize(o) { 1126 let n = {}; 1127 1128 for (let k in o) { 1129 let v = o[k]; 1130 if (v && k.endsWith("Date")) { 1131 n[k] = new Date(parseInt(v, 10)); 1132 } else { 1133 n[k] = v; 1134 } 1135 } 1136 1137 return n; 1138 }, 1139 1140 /** 1141 * Prune old crash data. 1142 * 1143 * Crashes without recent activity are pruned from the store so the 1144 * size of the store is not unbounded. If there is activity on a crash, 1145 * that activity will keep the crash and all its data around for longer. 1146 * 1147 * @param date 1148 * (Date) The cutoff at which data will be pruned. If an entry 1149 * doesn't have data newer than this, it will be pruned. 1150 */ 1151 pruneOldCrashes(date) { 1152 for (let crash of this.crashes) { 1153 let newest = crash.newestDate; 1154 if (!newest || newest.getTime() < date.getTime()) { 1155 this._data.crashes.delete(crash.id); 1156 } 1157 } 1158 }, 1159 1160 /** 1161 * Date the store was last corrupted and required a reset. 1162 * 1163 * May be null (no corruption has ever occurred) or a Date instance. 1164 */ 1165 get corruptDate() { 1166 return this._data.corruptDate; 1167 }, 1168 1169 /** 1170 * The number of distinct crashes tracked. 1171 */ 1172 get crashesCount() { 1173 return this._data.crashes.size; 1174 }, 1175 1176 /** 1177 * All crashes tracked. 1178 * 1179 * This is an array of CrashRecord. 1180 */ 1181 get crashes() { 1182 let crashes = []; 1183 for (let [, crash] of this._data.crashes) { 1184 crashes.push(new CrashRecord(crash)); 1185 } 1186 1187 return crashes; 1188 }, 1189 1190 /** 1191 * Obtain a particular crash from its ID. 1192 * 1193 * A CrashRecord will be returned if the crash exists. null will be returned 1194 * if the crash is unknown. 1195 */ 1196 getCrash(id) { 1197 for (let crash of this.crashes) { 1198 if (crash.id == id) { 1199 return crash; 1200 } 1201 } 1202 1203 return null; 1204 }, 1205 1206 _ensureCountsForDay(day) { 1207 if (!this._countsByDay.has(day)) { 1208 this._countsByDay.set(day, new Map()); 1209 } 1210 }, 1211 1212 /** 1213 * Ensure the crash record is present in storage. 1214 * 1215 * Returns the crash record if we're allowed to store it or null 1216 * if we've hit the high water mark. 1217 * 1218 * @param processType 1219 * (string) One of the PROCESS_TYPE constants. 1220 * @param crashType 1221 * (string) One of the CRASH_TYPE constants. 1222 * @param id 1223 * (string) The crash ID. 1224 * @param date 1225 * (Date) When this crash occurred. 1226 * @param metadata 1227 * (dictionary) Crash metadata, may be empty. 1228 * 1229 * @return null | object crash record 1230 */ 1231 _ensureCrashRecord(processType, crashType, id, date, metadata) { 1232 if (!id) { 1233 // Crashes are keyed on ID, so it's not really helpful to store crashes 1234 // without IDs. 1235 return null; 1236 } 1237 1238 let type = processType + "-" + crashType; 1239 1240 if (!this._data.crashes.has(id)) { 1241 let day = dateToDays(date); 1242 this._ensureCountsForDay(day); 1243 1244 let count = (this._countsByDay.get(day).get(type) || 0) + 1; 1245 this._countsByDay.get(day).set(type, count); 1246 1247 if (count > this.HIGH_WATER_DAILY_THRESHOLD && 1248 processType != CrashManager.prototype.PROCESS_TYPE_MAIN) { 1249 return null; 1250 } 1251 1252 // If we have an OOM size, count the crash as an OOM in addition to 1253 // being a main process crash. 1254 if (metadata && metadata.OOMAllocationSize) { 1255 let oomType = type + "-oom"; 1256 let oomCount = (this._countsByDay.get(day).get(oomType) || 0) + 1; 1257 this._countsByDay.get(day).set(oomType, oomCount); 1258 } 1259 1260 this._data.crashes.set(id, { 1261 id, 1262 remoteID: null, 1263 type, 1264 crashDate: date, 1265 submissions: new Map(), 1266 classifications: [], 1267 metadata, 1268 }); 1269 } 1270 1271 let crash = this._data.crashes.get(id); 1272 crash.type = type; 1273 crash.crashDate = date; 1274 1275 return crash; 1276 }, 1277 1278 /** 1279 * Record the occurrence of a crash. 1280 * 1281 * @param processType (string) One of the PROCESS_TYPE constants. 1282 * @param crashType (string) One of the CRASH_TYPE constants. 1283 * @param id (string) Crash ID. Likely a UUID. 1284 * @param date (Date) When the crash occurred. 1285 * @param metadata (dictionary) Crash metadata, may be empty. 1286 * 1287 * @return boolean True if the crash was recorded and false if not. 1288 */ 1289 addCrash(processType, crashType, id, date, metadata) { 1290 return !!this._ensureCrashRecord(processType, crashType, id, date, metadata); 1291 }, 1292 1293 /** 1294 * @return boolean True if the remote ID was recorded and false if not. 1295 */ 1296 setRemoteCrashID(crashID, remoteID) { 1297 let crash = this._data.crashes.get(crashID); 1298 if (!crash || !remoteID) { 1299 return false; 1300 } 1301 1302 crash.remoteID = remoteID; 1303 return true; 1304 }, 1305 1306 getCrashesOfType(processType, crashType) { 1307 let crashes = []; 1308 for (let crash of this.crashes) { 1309 if (crash.isOfType(processType, crashType)) { 1310 crashes.push(crash); 1311 } 1312 } 1313 1314 return crashes; 1315 }, 1316 1317 /** 1318 * Ensure the submission record is present in storage. 1319 * @returns [submission, crash] 1320 */ 1321 _ensureSubmissionRecord(crashID, submissionID) { 1322 let crash = this._data.crashes.get(crashID); 1323 if (!crash || !submissionID) { 1324 return null; 1325 } 1326 1327 if (!crash.submissions.has(submissionID)) { 1328 crash.submissions.set(submissionID, { 1329 requestDate: null, 1330 responseDate: null, 1331 result: null, 1332 }); 1333 } 1334 1335 return [crash.submissions.get(submissionID), crash]; 1336 }, 1337 1338 /** 1339 * @return boolean True if the attempt was recorded. 1340 */ 1341 addSubmissionAttempt(crashID, submissionID, date) { 1342 let [submission, crash] = 1343 this._ensureSubmissionRecord(crashID, submissionID); 1344 if (!submission) { 1345 return false; 1346 } 1347 1348 submission.requestDate = date; 1349 Services.telemetry.getKeyedHistogramById("PROCESS_CRASH_SUBMIT_ATTEMPT") 1350 .add(crash.type, 1); 1351 return true; 1352 }, 1353 1354 /** 1355 * @return boolean True if the response was recorded. 1356 */ 1357 addSubmissionResult(crashID, submissionID, date, result) { 1358 let crash = this._data.crashes.get(crashID); 1359 if (!crash || !submissionID) { 1360 return false; 1361 } 1362 let submission = crash.submissions.get(submissionID); 1363 if (!submission) { 1364 return false; 1365 } 1366 1367 submission.responseDate = date; 1368 submission.result = result; 1369 Services.telemetry.getKeyedHistogramById("PROCESS_CRASH_SUBMIT_SUCCESS") 1370 .add(crash.type, result == "ok"); 1371 return true; 1372 }, 1373 1374 /** 1375 * @return boolean True if the classifications were set. 1376 */ 1377 setCrashClassifications(crashID, classifications) { 1378 let crash = this._data.crashes.get(crashID); 1379 if (!crash) { 1380 return false; 1381 } 1382 1383 crash.classifications = classifications; 1384 return true; 1385 }, 1386}); 1387 1388/** 1389 * Represents an individual crash with metadata. 1390 * 1391 * This is a wrapper around the low-level anonymous JS objects that define 1392 * crashes. It exposes a consistent and helpful API. 1393 * 1394 * Instances of this type should only be constructured inside this module, 1395 * not externally. The constructor is not considered a public API. 1396 * 1397 * @param o (object) 1398 * The crash's entry from the CrashStore. 1399 */ 1400function CrashRecord(o) { 1401 this._o = o; 1402} 1403 1404CrashRecord.prototype = Object.freeze({ 1405 get id() { 1406 return this._o.id; 1407 }, 1408 1409 get remoteID() { 1410 return this._o.remoteID; 1411 }, 1412 1413 get crashDate() { 1414 return this._o.crashDate; 1415 }, 1416 1417 /** 1418 * Obtain the newest date in this record. 1419 * 1420 * This is a convenience getter. The returned value is used to determine when 1421 * to expire a record. 1422 */ 1423 get newestDate() { 1424 // We currently only have 1 date, so this is easy. 1425 return this._o.crashDate; 1426 }, 1427 1428 get oldestDate() { 1429 return this._o.crashDate; 1430 }, 1431 1432 get type() { 1433 return this._o.type; 1434 }, 1435 1436 isOfType(processType, crashType) { 1437 return processType + "-" + crashType == this.type; 1438 }, 1439 1440 get submissions() { 1441 return this._o.submissions; 1442 }, 1443 1444 get classifications() { 1445 return this._o.classifications; 1446 }, 1447 1448 get metadata() { 1449 return this._o.metadata; 1450 }, 1451}); 1452 1453/** 1454 * Obtain the global CrashManager instance used by the running application. 1455 * 1456 * CrashManager is likely only ever instantiated once per application lifetime. 1457 * The main reason it's implemented as a reusable type is to facilitate testing. 1458 */ 1459XPCOMUtils.defineLazyGetter(this.CrashManager, "Singleton", function() { 1460 if (gCrashManager) { 1461 return gCrashManager; 1462 } 1463 1464 let crPath = OS.Path.join(OS.Constants.Path.userApplicationDataDir, 1465 "Crash Reports"); 1466 let storePath = OS.Path.join(OS.Constants.Path.profileDir, "crashes"); 1467 1468 gCrashManager = new CrashManager({ 1469 pendingDumpsDir: OS.Path.join(crPath, "pending"), 1470 submittedDumpsDir: OS.Path.join(crPath, "submitted"), 1471 eventsDirs: [OS.Path.join(crPath, "events"), OS.Path.join(storePath, "events")], 1472 storeDir: storePath, 1473 telemetryStoreSizeKey: "CRASH_STORE_COMPRESSED_BYTES", 1474 }); 1475 1476 // Automatically aggregate event files shortly after startup. This 1477 // ensures it happens with some frequency. 1478 // 1479 // There are performance considerations here. While this is doing 1480 // work and could negatively impact performance, the amount of work 1481 // is kept small per run by periodically aggregating event files. 1482 // Furthermore, well-behaving installs should not have much work 1483 // here to do. If there is a lot of work, that install has bigger 1484 // issues beyond reduced performance near startup. 1485 gCrashManager.scheduleMaintenance(AGGREGATE_STARTUP_DELAY_MS); 1486 1487 return gCrashManager; 1488}); 1489