1/* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5"use strict"; 6 7/* 8 * This file currently contains a fairly general implementation of asynchronous 9 * indexing with a very explicit message indexing implementation. As gloda 10 * will eventually want to index more than just messages, the message-specific 11 * things should ideally lose their special hold on this file. This will 12 * benefit readability/size as well. 13 */ 14 15const EXPORTED_SYMBOLS = ["GlodaMsgIndexer"]; 16 17const { MailServices } = ChromeUtils.import( 18 "resource:///modules/MailServices.jsm" 19); 20const { MailUtils } = ChromeUtils.import("resource:///modules/MailUtils.jsm"); 21 22const { GlodaDatastore } = ChromeUtils.import( 23 "resource:///modules/gloda/GlodaDatastore.jsm" 24); 25const { GlodaContact, GlodaFolder } = ChromeUtils.import( 26 "resource:///modules/gloda/GlodaDataModel.jsm" 27); 28const { Gloda } = ChromeUtils.import("resource:///modules/gloda/Gloda.jsm"); 29const { GlodaCollectionManager } = ChromeUtils.import( 30 "resource:///modules/gloda/Collection.jsm" 31); 32const { GlodaIndexer, IndexingJob } = ChromeUtils.import( 33 "resource:///modules/gloda/GlodaIndexer.jsm" 34); 35const { MsgHdrToMimeMessage } = ChromeUtils.import( 36 "resource:///modules/gloda/MimeMessage.jsm" 37); 38 39// Cr does not have mailnews error codes! 40var NS_MSG_ERROR_FOLDER_SUMMARY_OUT_OF_DATE = 0x80550005; 41 42var GLODA_MESSAGE_ID_PROPERTY = "gloda-id"; 43/** 44 * Message header property to track dirty status; one of 45 * |GlodaIndexer.kMessageClean|, |GlodaIndexer.kMessageDirty|, 46 * |GlodaIndexer.kMessageFilthy|. 47 */ 48var GLODA_DIRTY_PROPERTY = "gloda-dirty"; 49 50/** 51 * The sentinel GLODA_MESSAGE_ID_PROPERTY value indicating that a message fails 52 * to index and we should not bother trying again, at least not until a new 53 * release is made. 54 * 55 * This should ideally just flip between 1 and 2, with GLODA_OLD_BAD_MESSAGE_ID 56 * flipping in the other direction. If we start having more trailing badness, 57 * _indexerGetEnumerator and GLODA_OLD_BAD_MESSAGE_ID will need to be altered. 58 * 59 * When flipping this, be sure to update glodaTestHelper.js's copy. 60 */ 61var GLODA_BAD_MESSAGE_ID = 2; 62/** 63 * The gloda id we used to use to mark messages as bad, but now should be 64 * treated as eligible for indexing. This is only ever used for consideration 65 * when creating msg header enumerators with `_indexerGetEnumerator` which 66 * means we only will re-index such messages in an indexing sweep. Accordingly 67 * event-driven indexing will still treat such messages as unindexed (and 68 * unindexable) until an indexing sweep picks them up. 69 */ 70var GLODA_OLD_BAD_MESSAGE_ID = 1; 71var GLODA_FIRST_VALID_MESSAGE_ID = 32; 72 73var JUNK_SCORE_PROPERTY = "junkscore"; 74var JUNK_SPAM_SCORE_STR = Ci.nsIJunkMailPlugin.IS_SPAM_SCORE.toString(); 75 76/** 77 * The processing flags that tell us that a message header has not yet been 78 * reported to us via msgsClassified. If it has one of these flags, it is 79 * still being processed. 80 */ 81var NOT_YET_REPORTED_PROCESSING_FLAGS = 82 Ci.nsMsgProcessingFlags.NotReportedClassified | 83 Ci.nsMsgProcessingFlags.ClassifyJunk; 84 85// for list comprehension fun 86function* range(begin, end) { 87 for (let i = begin; i < end; ++i) { 88 yield i; 89 } 90} 91 92/** 93 * We do not set properties on the messages until we perform a DB commit; this 94 * helper class tracks messages that we have indexed but are not yet marked 95 * as such on their header. 96 */ 97var PendingCommitTracker = { 98 /** 99 * Maps message URIs to their gloda ids. 100 * 101 * I am not entirely sure why I chose the URI for the key rather than 102 * gloda folder ID + message key. Most likely it was to simplify debugging 103 * since the gloda folder ID is opaque while the URI is very informative. It 104 * is also possible I was afraid of IMAP folder renaming triggering a UID 105 * renumbering? 106 */ 107 _indexedMessagesPendingCommitByKey: {}, 108 /** 109 * Map from the pending commit gloda id to a tuple of [the corresponding 110 * message header, dirtyState]. 111 */ 112 _indexedMessagesPendingCommitByGlodaId: {}, 113 /** 114 * Do we have a post-commit handler registered with this transaction yet? 115 */ 116 _pendingCommit: false, 117 118 /** 119 * The function gets called when the commit actually happens to flush our 120 * message id's. 121 * 122 * It is very possible that by the time this call happens we have left the 123 * folder and nulled out msgDatabase on the folder. Since nulling it out 124 * is what causes the commit, if we set the headers here without somehow 125 * forcing a commit, we will lose. Badly. 126 * Accordingly, we make a list of all the folders that the headers belong to 127 * as we iterate, make sure to re-attach their msgDatabase before forgetting 128 * the headers, then make sure to zero the msgDatabase again, triggering a 129 * commit. If there were a way to directly get the nsIMsgDatabase from the 130 * header we could do that and call commit directly. We don't track 131 * databases along with the headers since the headers can change because of 132 * moves and that would increase the number of moving parts. 133 */ 134 _commitCallback() { 135 let foldersByURI = {}; 136 let lastFolder = null; 137 138 for (let glodaId in PendingCommitTracker._indexedMessagesPendingCommitByGlodaId) { 139 let [ 140 msgHdr, 141 dirtyState, 142 ] = PendingCommitTracker._indexedMessagesPendingCommitByGlodaId[glodaId]; 143 // Mark this message as indexed. 144 // It's conceivable the database could have gotten blown away, in which 145 // case the message headers are going to throw exceptions when we try 146 // and touch them. So we wrap this in a try block that complains about 147 // this unforeseen circumstance. (noteFolderDatabaseGettingBlownAway 148 // should have been called and avoided this situation in all known 149 // situations.) 150 try { 151 let curGlodaId = msgHdr.getUint32Property(GLODA_MESSAGE_ID_PROPERTY); 152 if (curGlodaId != glodaId) { 153 msgHdr.setUint32Property(GLODA_MESSAGE_ID_PROPERTY, glodaId); 154 } 155 let headerDirty = msgHdr.getUint32Property(GLODA_DIRTY_PROPERTY); 156 if (headerDirty != dirtyState) { 157 msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, dirtyState); 158 } 159 160 // Make sure this folder is in our foldersByURI map. 161 if (lastFolder == msgHdr.folder) { 162 continue; 163 } 164 lastFolder = msgHdr.folder; 165 let folderURI = lastFolder.URI; 166 if (!(folderURI in foldersByURI)) { 167 foldersByURI[folderURI] = lastFolder; 168 } 169 } catch (ex) { 170 GlodaMsgIndexer._log.error( 171 "Exception while attempting to mark message with gloda state after" + 172 "db commit", 173 ex 174 ); 175 } 176 } 177 178 // it is vitally important to do this before we forget about the headers! 179 for (let uri in foldersByURI) { 180 let folder = foldersByURI[uri]; 181 // This will not cause a parse. The database is in-memory since we have 182 // a header that belongs to it. This just causes the folder to 183 // re-acquire a reference from the database manager. 184 folder.msgDatabase; 185 // And this will cause a commit. (And must be done since we don't want 186 // to cause a leak.) 187 folder.msgDatabase = null; 188 } 189 190 PendingCommitTracker._indexedMessagesPendingCommitByGlodaId = {}; 191 PendingCommitTracker._indexedMessagesPendingCommitByKey = {}; 192 193 PendingCommitTracker._pendingCommit = false; 194 }, 195 196 /** 197 * Track a message header that should be marked with the given gloda id when 198 * the database commits. 199 */ 200 track(aMsgHdr, aGlodaId) { 201 let pendingKey = aMsgHdr.folder.URI + "#" + aMsgHdr.messageKey; 202 this._indexedMessagesPendingCommitByKey[pendingKey] = aGlodaId; 203 this._indexedMessagesPendingCommitByGlodaId[aGlodaId] = [ 204 aMsgHdr, 205 GlodaMsgIndexer.kMessageClean, 206 ]; 207 208 if (!this._pendingCommit) { 209 GlodaDatastore.runPostCommit(this._commitCallback); 210 this._pendingCommit = true; 211 } 212 }, 213 214 /** 215 * Get the current state of a message header given that we cannot rely on just 216 * looking at the header's properties because we defer setting those 217 * until the SQLite commit happens. 218 * 219 * @return Tuple of [gloda id, dirty status]. 220 */ 221 getGlodaState(aMsgHdr) { 222 // If it's in the pending commit table, then the message is basically 223 // clean. Return that info. 224 let pendingKey = aMsgHdr.folder.URI + "#" + aMsgHdr.messageKey; 225 if (pendingKey in this._indexedMessagesPendingCommitByKey) { 226 let glodaId = 227 PendingCommitTracker._indexedMessagesPendingCommitByKey[pendingKey]; 228 return [glodaId, this._indexedMessagesPendingCommitByGlodaId[glodaId][1]]; 229 } 230 231 // Otherwise the header's concept of state is correct. 232 let glodaId = aMsgHdr.getUint32Property(GLODA_MESSAGE_ID_PROPERTY); 233 let glodaDirty = aMsgHdr.getUint32Property(GLODA_DIRTY_PROPERTY); 234 return [glodaId, glodaDirty]; 235 }, 236 237 /** 238 * Update our structure to reflect moved headers. Moves are currently 239 * treated as weakly interesting and do not require a reindexing 240 * although collections will get notified. So our job is to to fix-up 241 * the pending commit information if the message has a pending commit. 242 */ 243 noteMove(aOldHdr, aNewHdr) { 244 let oldKey = aOldHdr.folder.URI + "#" + aOldHdr.messageKey; 245 if (!(oldKey in this._indexedMessagesPendingCommitByKey)) { 246 return; 247 } 248 249 let glodaId = this._indexedMessagesPendingCommitByKey[oldKey]; 250 delete this._indexedMessagesPendingCommitByKey[oldKey]; 251 252 let newKey = aNewHdr.folder.URI + "#" + aNewHdr.messageKey; 253 this._indexedMessagesPendingCommitByKey[newKey] = glodaId; 254 255 // only clobber the header, not the dirty state 256 this._indexedMessagesPendingCommitByGlodaId[glodaId][0] = aNewHdr; 257 }, 258 259 /** 260 * A blind move is one where we have the source header but not the destination 261 * header. This happens for IMAP messages that do not involve offline fake 262 * headers. 263 * XXX Since IMAP moves will propagate the gloda-id/gloda-dirty bits for us, 264 * we could detect the other side of the move when it shows up as a 265 * msgsClassified event and restore the mapping information. Since the 266 * offline fake header case should now cover the bulk of IMAP move 267 * operations, we probably do not need to pursue this. 268 * 269 * We just re-dispatch to noteDirtyHeader because we can't do anything more 270 * clever. 271 */ 272 noteBlindMove(aOldHdr) { 273 this.noteDirtyHeader(aOldHdr); 274 }, 275 276 /** 277 * If a message is dirty we should stop tracking it for post-commit 278 * purposes. This is not because we don't want to write to its header 279 * when we commit as much as that we want to avoid |getHeaderGlodaState| 280 * reporting that the message is clean. We could complicate our state 281 * by storing that information, but this is easier and ends up the same 282 * in the end. 283 */ 284 noteDirtyHeader(aMsgHdr) { 285 let pendingKey = aMsgHdr.folder.URI + "#" + aMsgHdr.messageKey; 286 if (!(pendingKey in this._indexedMessagesPendingCommitByKey)) { 287 return; 288 } 289 290 // (It is important that we get the gloda id from our own structure!) 291 let glodaId = this._indexedMessagesPendingCommitByKey[pendingKey]; 292 this._indexedMessagesPendingCommitByGlodaId[glodaId][1] = 293 GlodaMsgIndexer.kMessageDirty; 294 }, 295 296 /** 297 * Sometimes a folder database gets blown away. This happens for one of two 298 * expected reasons right now: 299 * - Folder compaction. 300 * - Explicit reindexing of a folder via the folder properties "rebuild index" 301 * button. 302 * 303 * When this happens, we are basically out of luck and need to discard 304 * everything about the folder. The good news is that the folder compaction 305 * pass is clever enough to re-establish the linkages that are being lost 306 * when we drop these things on the floor. Reindexing of a folder is not 307 * clever enough to deal with this but is an exceptional case of last resort 308 * (the user should not normally be performing a reindex as part of daily 309 * operation), so we accept that messages may be redundantly indexed. 310 */ 311 noteFolderDatabaseGettingBlownAway(aMsgFolder) { 312 let uri = aMsgFolder.URI + "#"; 313 for (let key of Object.keys(this._indexedMessagesPendingCommitByKey)) { 314 // this is not as efficient as it could be, but compaction is relatively 315 // rare and the number of pending headers is generally going to be 316 // small. 317 if (key.indexOf(uri) == 0) { 318 delete this._indexedMessagesPendingCommitByKey[key]; 319 } 320 } 321 }, 322}; 323 324/** 325 * This callback handles processing the asynchronous query results of 326 * |GlodaMsgIndexer.getMessagesByMessageID|. 327 */ 328function MessagesByMessageIdCallback( 329 aMsgIDToIndex, 330 aResults, 331 aCallback, 332 aCallbackThis 333) { 334 this.msgIDToIndex = aMsgIDToIndex; 335 this.results = aResults; 336 this.callback = aCallback; 337 this.callbackThis = aCallbackThis; 338} 339 340MessagesByMessageIdCallback.prototype = { 341 _log: console.createInstance({ 342 prefix: "gloda.index_msg.mbm", 343 maxLogLevel: "Warn", 344 maxLogLevelPref: "gloda.loglevel", 345 }), 346 347 onItemsAdded(aItems, aCollection) { 348 // just outright bail if we are shutdown 349 if (GlodaDatastore.datastoreIsShutdown) { 350 return; 351 } 352 353 this._log.debug("getting results..."); 354 for (let message of aItems) { 355 this.results[this.msgIDToIndex[message.headerMessageID]].push(message); 356 } 357 }, 358 onItemsModified() {}, 359 onItemsRemoved() {}, 360 onQueryCompleted(aCollection) { 361 // just outright bail if we are shutdown 362 if (GlodaDatastore.datastoreIsShutdown) { 363 return; 364 } 365 366 this._log.debug("query completed, notifying... " + this.results); 367 368 this.callback.call(this.callbackThis, this.results); 369 }, 370}; 371 372/** 373 * The message indexer! 374 * 375 * === Message Indexing Strategy 376 * To these ends, we implement things like so: 377 * 378 * Message State Tracking 379 * - We store a property on all indexed headers indicating their gloda message 380 * id. This allows us to tell whether a message is indexed from the header, 381 * without having to consult the SQL database. 382 * - When we receive an event that indicates that a message's meta-data has 383 * changed and gloda needs to re-index the message, we set a property on the 384 * header that indicates the message is dirty. This property can indicate 385 * that the message needs to be re-indexed but the gloda-id is valid (dirty) 386 * or that the message's gloda-id is invalid (filthy) because the gloda 387 * database has been blown away. 388 * - We track whether a folder is up-to-date on our GlodaFolder representation 389 * using a concept of dirtiness, just like messages. Like messages, a folder 390 * can be dirty or filthy. A dirty folder has at least one dirty message in 391 * it which means we should scan the folder. A filthy folder means that 392 * every message in the folder should be considered filthy. Folders start 393 * out filthy when Gloda is first told about them indicating we cannot 394 * trust any of the gloda-id's in the folders. Filthy folders are downgraded 395 * to dirty folders after we mark all of the headers with gloda-id's filthy. 396 * 397 * Indexing Message Control 398 * - We index the headers of all IMAP messages. We index the bodies of all IMAP 399 * messages that are offline. We index all local messages. We plan to avoid 400 * indexing news messages. 401 * - We would like a way to express desires about indexing that either don't 402 * confound offline storage with indexing, or actually allow some choice. 403 * 404 * Indexing Messages 405 * - We have two major modes of indexing: sweep and event-driven. When we 406 * start up we kick off an indexing sweep. We use event-driven indexing 407 * as we receive events for eligible messages, but if we get too many 408 * events we start dropping them on the floor and just flag that an indexing 409 * sweep is required. 410 * - The sweep initiates folder indexing jobs based on the priorities assigned 411 * to folders. Folder indexing uses a filtered message enumerator to find 412 * messages that need to be indexed, minimizing wasteful exposure of message 413 * headers to XPConnect that we would not end up indexing. 414 * - For local folders, we use GetDatabaseWithReparse to ensure that the .msf 415 * file exists. For IMAP folders, we simply use GetDatabase because we know 416 * the auto-sync logic will make sure that the folder is up-to-date and we 417 * want to avoid creating problems through use of updateFolder. 418 * 419 * Junk Mail 420 * - We do not index junk. We do not index messages until the junk/non-junk 421 * determination has been made. If a message gets marked as junk, we act like 422 * it was deleted. 423 * - We know when a message is actively queued for junk processing thanks to 424 * folder processing flags. nsMsgDBFolder::CallFilterPlugins does this 425 * prior to initiating spam processing. Unfortunately, this method does not 426 * get called until after we receive the notification about the existence of 427 * the header. How long after can vary on different factors. The longest 428 * delay is in the IMAP case where there is a filter that requires the 429 * message body to be present; the method does not get called until all the 430 * bodies are downloaded. 431 * 432 */ 433var GlodaMsgIndexer = { 434 /** 435 * A partial attempt to generalize to support multiple databases. Each 436 * database would have its own datastore would have its own indexer. But 437 * we rather inter-mingle our use of this field with the singleton global 438 * GlodaDatastore. 439 */ 440 _datastore: GlodaDatastore, 441 _log: console.createInstance({ 442 prefix: "gloda.index_msg", 443 maxLogLevel: "Warn", 444 maxLogLevelPref: "gloda.loglevel", 445 }), 446 447 _junkService: MailServices.junk, 448 449 name: "index_msg", 450 /** 451 * Are we enabled, read: are we processing change events? 452 */ 453 _enabled: false, 454 get enabled() { 455 return this._enabled; 456 }, 457 458 enable() { 459 // initialize our listeners' this pointers 460 this._databaseAnnouncerListener.indexer = this; 461 this._msgFolderListener.indexer = this; 462 463 // register for: 464 // - folder loaded events, so we know when getDatabaseWithReparse has 465 // finished updating the index/what not (if it wasn't immediately 466 // available) 467 // - property changes (so we know when a message's read/starred state have 468 // changed.) 469 this._folderListener._init(this); 470 MailServices.mailSession.AddFolderListener( 471 this._folderListener, 472 Ci.nsIFolderListener.intPropertyChanged | 473 Ci.nsIFolderListener.propertyFlagChanged | 474 Ci.nsIFolderListener.event 475 ); 476 477 MailServices.mfn.addListener( 478 this._msgFolderListener, 479 // note: intentionally no msgAdded or msgUnincorporatedMoved. 480 Ci.nsIMsgFolderNotificationService.msgsClassified | 481 Ci.nsIMsgFolderNotificationService.msgsJunkStatusChanged | 482 Ci.nsIMsgFolderNotificationService.msgsDeleted | 483 Ci.nsIMsgFolderNotificationService.msgsMoveCopyCompleted | 484 Ci.nsIMsgFolderNotificationService.msgKeyChanged | 485 Ci.nsIMsgFolderNotificationService.folderAdded | 486 Ci.nsIMsgFolderNotificationService.folderDeleted | 487 Ci.nsIMsgFolderNotificationService.folderMoveCopyCompleted | 488 Ci.nsIMsgFolderNotificationService.folderRenamed | 489 Ci.nsIMsgFolderNotificationService.folderCompactStart | 490 Ci.nsIMsgFolderNotificationService.folderCompactFinish | 491 Ci.nsIMsgFolderNotificationService.folderReindexTriggered 492 ); 493 494 this._enabled = true; 495 496 this._considerSchemaMigration(); 497 498 this._log.info("Event-Driven Indexing is now " + this._enabled); 499 }, 500 disable() { 501 // remove FolderLoaded notification listener 502 MailServices.mailSession.RemoveFolderListener(this._folderListener); 503 504 MailServices.mfn.removeListener(this._msgFolderListener); 505 506 this._indexerLeaveFolder(); // nop if we aren't "in" a folder 507 508 this._enabled = false; 509 510 this._log.info("Event-Driven Indexing is now " + this._enabled); 511 }, 512 513 /** 514 * Indicates that we have pending deletions to process, meaning that there 515 * are gloda message rows flagged for deletion. If this value is a boolean, 516 * it means the value is known reliably. If this value is null, it means 517 * that we don't know, likely because we have started up and have not checked 518 * the database. 519 */ 520 pendingDeletions: null, 521 522 /** 523 * The message (or folder state) is believed up-to-date. 524 */ 525 kMessageClean: 0, 526 /** 527 * The message (or folder) is known to not be up-to-date. In the case of 528 * folders, this means that some of the messages in the folder may be dirty. 529 * However, because of the way our indexing works, it is possible there may 530 * actually be no dirty messages in a folder. (We attempt to process 531 * messages in an event-driven fashion for a finite number of messages, but 532 * because we can quit without completing processing of the queue, we need to 533 * mark the folder dirty, just-in-case.) (We could do some extra leg-work 534 * and do a better job of marking the folder clean again.) 535 */ 536 kMessageDirty: 1, 537 /** 538 * We have not indexed the folder at all, but messages in the folder think 539 * they are indexed. We downgrade the folder to just kMessageDirty after 540 * marking all the messages in the folder as dirty. We do this so that if we 541 * have to stop indexing the folder we can still build on our progress next 542 * time we enter the folder. 543 * We mark all folders filthy when (re-)creating the database because there 544 * may be previous state left over from an earlier database. 545 */ 546 kMessageFilthy: 2, 547 548 /** 549 * A message addition job yet to be (completely) processed. Since message 550 * addition events come to us one-by-one, in order to aggregate them into a 551 * job, we need something like this. It's up to the indexing loop to 552 * decide when to null this out; it can either do it when it first starts 553 * processing it, or when it has processed the last thing. It's really a 554 * question of whether we want retrograde motion in the folder progress bar 555 * or the message progress bar. 556 */ 557 _pendingAddJob: null, 558 559 /** 560 * The number of messages that we should queue for processing before letting 561 * them fall on the floor and relying on our folder-walking logic to ensure 562 * that the messages are indexed. 563 * The reason we allow for queueing messages in an event-driven fashion is 564 * that once we have reached a steady-state, it is preferable to be able to 565 * deal with new messages and modified meta-data in a prompt fashion rather 566 * than having to (potentially) walk every folder in the system just to find 567 * the message that the user changed the tag on. 568 */ 569 _indexMaxEventQueueMessages: 20, 570 571 /** 572 * Unit testing hook to get us to emit additional logging that verges on 573 * inane for general usage but is helpful in unit test output to get a lay 574 * of the land and for paranoia reasons. 575 */ 576 _unitTestSuperVerbose: false, 577 578 /** The GlodaFolder corresponding to the folder we are indexing. */ 579 _indexingGlodaFolder: null, 580 /** The nsIMsgFolder we are currently indexing. */ 581 _indexingFolder: null, 582 /** The nsIMsgDatabase we are currently indexing. */ 583 _indexingDatabase: null, 584 /** 585 * The iterator we are using to iterate over the headers in 586 * this._indexingDatabase. 587 */ 588 _indexingIterator: null, 589 590 /** folder whose entry we are pending on */ 591 _pendingFolderEntry: null, 592 593 // copy-down the work constants from Gloda 594 kWorkSync: Gloda.kWorkSync, 595 kWorkAsync: Gloda.kWorkAsync, 596 kWorkDone: Gloda.kWorkDone, 597 kWorkPause: Gloda.kWorkPause, 598 kWorkDoneWithResult: Gloda.kWorkDoneWithResult, 599 600 /** 601 * Async common logic that we want to deal with the given folder ID. Besides 602 * cutting down on duplicate code, this ensures that we are listening on 603 * the folder in case it tries to go away when we are using it. 604 * 605 * @return true when the folder was successfully entered, false when we need 606 * to pend on notification of updating of the folder (due to re-parsing 607 * or what have you). In the event of an actual problem, an exception 608 * will escape. 609 */ 610 _indexerEnterFolder(aFolderID) { 611 // leave the folder if we haven't explicitly left it. 612 if (this._indexingFolder !== null) { 613 this._indexerLeaveFolder(); 614 } 615 616 this._indexingGlodaFolder = GlodaDatastore._mapFolderID(aFolderID); 617 this._indexingFolder = this._indexingGlodaFolder.getXPCOMFolder( 618 this._indexingGlodaFolder.kActivityIndexing 619 ); 620 621 if (this._indexingFolder) { 622 this._log.debug("Entering folder: " + this._indexingFolder.URI); 623 } 624 625 try { 626 // The msf may need to be created or otherwise updated for local folders. 627 // This may require yielding until such time as the msf has been created. 628 try { 629 if (this._indexingFolder instanceof Ci.nsIMsgLocalMailFolder) { 630 this._indexingDatabase = this._indexingFolder.getDatabaseWithReparse( 631 null, 632 null 633 ); 634 } 635 // we need do nothing special for IMAP, news, or other 636 } catch (e) { 637 // getDatabaseWithReparse can return either NS_ERROR_NOT_INITIALIZED or 638 // NS_MSG_ERROR_FOLDER_SUMMARY_OUT_OF_DATE if the net result is that it 639 // is going to send us a notification when the reparse has completed. 640 // (note that although internally NS_MSG_ERROR_FOLDER_SUMMARY_MISSING 641 // might get flung around, it won't make it out to us, and will instead 642 // be permuted into an NS_ERROR_NOT_INITIALIZED.) 643 if ( 644 e.result == Cr.NS_ERROR_NOT_INITIALIZED || 645 e.result == NS_MSG_ERROR_FOLDER_SUMMARY_OUT_OF_DATE 646 ) { 647 // this means that we need to pend on the update; the listener for 648 // FolderLoaded events will call _indexerCompletePendingFolderEntry. 649 this._log.debug("Pending on folder load..."); 650 this._pendingFolderEntry = this._indexingFolder; 651 return this.kWorkAsync; 652 } 653 throw e; 654 } 655 // we get an nsIMsgDatabase out of this (unsurprisingly) which 656 // explicitly inherits from nsIDBChangeAnnouncer, which has the 657 // AddListener call we want. 658 if (this._indexingDatabase == null) { 659 this._indexingDatabase = this._indexingFolder.msgDatabase; 660 } 661 this._indexingDatabase.AddListener(this._databaseAnnouncerListener); 662 } catch (ex) { 663 this._log.error( 664 "Problem entering folder: " + 665 (this._indexingFolder ? this._indexingFolder.prettyName : "unknown") + 666 ", skipping. Error was: " + 667 ex.fileName + 668 ":" + 669 ex.lineNumber + 670 ": " + 671 ex 672 ); 673 this._indexingGlodaFolder.indexing = false; 674 this._indexingFolder = null; 675 this._indexingGlodaFolder = null; 676 this._indexingDatabase = null; 677 this._indexingEnumerator = null; 678 679 // re-throw, we just wanted to make sure this junk is cleaned up and 680 // get localized error logging... 681 throw ex; 682 } 683 684 return this.kWorkSync; 685 }, 686 687 /** 688 * If the folder was still parsing/updating when we tried to enter, then this 689 * handler will get called by the listener who got the FolderLoaded message. 690 * All we need to do is get the database reference, register a listener on 691 * the db, and retrieve an iterator if desired. 692 */ 693 _indexerCompletePendingFolderEntry() { 694 this._indexingDatabase = this._indexingFolder.msgDatabase; 695 this._indexingDatabase.AddListener(this._databaseAnnouncerListener); 696 this._log.debug("...Folder Loaded!"); 697 698 // the load is no longer pending; we certainly don't want more notifications 699 this._pendingFolderEntry = null; 700 // indexerEnterFolder returned kWorkAsync, which means we need to notify 701 // the callback driver to get things going again. 702 GlodaIndexer.callbackDriver(); 703 }, 704 705 /** 706 * Enumerate all messages in the folder. 707 */ 708 kEnumAllMsgs: 0, 709 /** 710 * Enumerate messages that look like they need to be indexed. 711 */ 712 kEnumMsgsToIndex: 1, 713 /** 714 * Enumerate messages that are already indexed. 715 */ 716 kEnumIndexedMsgs: 2, 717 718 /** 719 * Synchronous helper to get an enumerator for the current folder (as found 720 * in |_indexingFolder|. 721 * 722 * @param aEnumKind One of |kEnumAllMsgs|, |kEnumMsgsToIndex|, or 723 * |kEnumIndexedMsgs|. 724 * @param [aAllowPreBadIds=false] Only valid for |kEnumIndexedMsgs|, tells us 725 * that we should treat message with any gloda-id as dirty, not just 726 * messages that have non-bad message id's. 727 */ 728 _indexerGetEnumerator(aEnumKind, aAllowPreBadIds) { 729 if (aEnumKind == this.kEnumMsgsToIndex) { 730 // We need to create search terms for messages to index. Messages should 731 // be indexed if they're indexable (local or offline and not expunged) 732 // and either: haven't been indexed, are dirty, or are marked with with 733 // a former GLODA_BAD_MESSAGE_ID that is no longer our bad marker. (Our 734 // bad marker can change on minor schema revs so that we can try and 735 // reindex those messages exactly once and without needing to go through 736 // a pass to mark them as needing one more try.) 737 // The basic search expression is: 738 // ((GLODA_MESSAGE_ID_PROPERTY Is 0) || 739 // (GLODA_MESSAGE_ID_PROPERTY Is GLODA_OLD_BAD_MESSAGE_ID) || 740 // (GLODA_DIRTY_PROPERTY Isnt 0)) && 741 // (JUNK_SCORE_PROPERTY Isnt 100) 742 // If the folder !isLocal we add the terms: 743 // - if the folder is offline -- && (Status Is nsMsgMessageFlags.Offline) 744 // - && (Status Isnt nsMsgMessageFlags.Expunged) 745 746 let searchSession = Cc[ 747 "@mozilla.org/messenger/searchSession;1" 748 ].createInstance(Ci.nsIMsgSearchSession); 749 let searchTerms = []; 750 let isLocal = this._indexingFolder instanceof Ci.nsIMsgLocalMailFolder; 751 752 searchSession.addScopeTerm( 753 Ci.nsMsgSearchScope.offlineMail, 754 this._indexingFolder 755 ); 756 let nsMsgSearchAttrib = Ci.nsMsgSearchAttrib; 757 let nsMsgSearchOp = Ci.nsMsgSearchOp; 758 759 // first term: (GLODA_MESSAGE_ID_PROPERTY Is 0 760 let searchTerm = searchSession.createTerm(); 761 searchTerm.booleanAnd = false; // actually don't care here 762 searchTerm.beginsGrouping = true; 763 searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty; 764 searchTerm.op = nsMsgSearchOp.Is; 765 let value = searchTerm.value; 766 value.attrib = searchTerm.attrib; 767 value.status = 0; 768 searchTerm.value = value; 769 searchTerm.hdrProperty = GLODA_MESSAGE_ID_PROPERTY; 770 searchTerms.push(searchTerm); 771 772 // second term: || GLODA_MESSAGE_ID_PROPERTY Is GLODA_OLD_BAD_MESSAGE_ID 773 searchTerm = searchSession.createTerm(); 774 searchTerm.booleanAnd = false; // OR 775 searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty; 776 searchTerm.op = nsMsgSearchOp.Is; 777 value = searchTerm.value; 778 value.attrib = searchTerm.attrib; 779 value.status = GLODA_OLD_BAD_MESSAGE_ID; 780 searchTerm.value = value; 781 searchTerm.hdrProperty = GLODA_MESSAGE_ID_PROPERTY; 782 searchTerms.push(searchTerm); 783 784 // third term: || GLODA_DIRTY_PROPERTY Isnt 0 ) 785 searchTerm = searchSession.createTerm(); 786 searchTerm.booleanAnd = false; 787 searchTerm.endsGrouping = true; 788 searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty; 789 searchTerm.op = nsMsgSearchOp.Isnt; 790 value = searchTerm.value; 791 value.attrib = searchTerm.attrib; 792 value.status = 0; 793 searchTerm.value = value; 794 searchTerm.hdrProperty = GLODA_DIRTY_PROPERTY; 795 searchTerms.push(searchTerm); 796 797 // JUNK_SCORE_PROPERTY Isnt 100 798 // For symmetry with our event-driven stuff, we just directly deal with 799 // the header property. 800 searchTerm = searchSession.createTerm(); 801 searchTerm.booleanAnd = true; 802 searchTerm.attrib = nsMsgSearchAttrib.HdrProperty; 803 searchTerm.op = nsMsgSearchOp.Isnt; 804 value = searchTerm.value; 805 value.attrib = searchTerm.attrib; 806 value.str = JUNK_SPAM_SCORE_STR; 807 searchTerm.value = value; 808 searchTerm.hdrProperty = JUNK_SCORE_PROPERTY; 809 searchTerms.push(searchTerm); 810 811 if (!isLocal) { 812 // If the folder is offline, then the message should be too 813 if (this._indexingFolder.getFlag(Ci.nsMsgFolderFlags.Offline)) { 814 // third term: && Status Is nsMsgMessageFlags.Offline 815 searchTerm = searchSession.createTerm(); 816 searchTerm.booleanAnd = true; 817 searchTerm.attrib = nsMsgSearchAttrib.MsgStatus; 818 searchTerm.op = nsMsgSearchOp.Is; 819 value = searchTerm.value; 820 value.attrib = searchTerm.attrib; 821 value.status = Ci.nsMsgMessageFlags.Offline; 822 searchTerm.value = value; 823 searchTerms.push(searchTerm); 824 } 825 826 // fourth term: && Status Isnt nsMsgMessageFlags.Expunged 827 searchTerm = searchSession.createTerm(); 828 searchTerm.booleanAnd = true; 829 searchTerm.attrib = nsMsgSearchAttrib.MsgStatus; 830 searchTerm.op = nsMsgSearchOp.Isnt; 831 value = searchTerm.value; 832 value.attrib = searchTerm.attrib; 833 value.status = Ci.nsMsgMessageFlags.Expunged; 834 searchTerm.value = value; 835 searchTerms.push(searchTerm); 836 } 837 838 this._indexingEnumerator = this._indexingDatabase.getFilterEnumerator( 839 searchTerms, 840 true 841 ); 842 } else if (aEnumKind == this.kEnumIndexedMsgs) { 843 // Enumerate only messages that are already indexed. This comes out to: 844 // ((GLODA_MESSAGE_ID_PROPERTY > GLODA_FIRST_VALID_MESSAGE_ID-1) && 845 // (GLODA_DIRTY_PROPERTY Isnt kMessageFilthy)) 846 // In English, a message is indexed if (by clause): 847 // 1) The message has a gloda-id and that gloda-id is in the valid range 848 // (and not in the bad message marker range). 849 // 2) The message has not been marked filthy (which invalidates the 850 // gloda-id.) We also assume that the folder would not have been 851 // entered at all if it was marked filthy. 852 let searchSession = Cc[ 853 "@mozilla.org/messenger/searchSession;1" 854 ].createInstance(Ci.nsIMsgSearchSession); 855 let searchTerms = []; 856 857 searchSession.addScopeTerm( 858 Ci.nsMsgSearchScope.offlineMail, 859 this._indexingFolder 860 ); 861 let nsMsgSearchAttrib = Ci.nsMsgSearchAttrib; 862 let nsMsgSearchOp = Ci.nsMsgSearchOp; 863 864 // first term: (GLODA_MESSAGE_ID_PROPERTY > GLODA_FIRST_VALID_MESSAGE_ID-1 865 let searchTerm = searchSession.createTerm(); 866 searchTerm.booleanAnd = false; // actually don't care here 867 searchTerm.beginsGrouping = true; 868 searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty; 869 // use != 0 if we're allow pre-bad ids. 870 searchTerm.op = aAllowPreBadIds 871 ? nsMsgSearchOp.Isnt 872 : nsMsgSearchOp.IsGreaterThan; 873 let value = searchTerm.value; 874 value.attrib = searchTerm.attrib; 875 value.status = aAllowPreBadIds ? 0 : GLODA_FIRST_VALID_MESSAGE_ID - 1; 876 searchTerm.value = value; 877 searchTerm.hdrProperty = GLODA_MESSAGE_ID_PROPERTY; 878 searchTerms.push(searchTerm); 879 880 // second term: && GLODA_DIRTY_PROPERTY Isnt kMessageFilthy) 881 searchTerm = searchSession.createTerm(); 882 searchTerm.booleanAnd = true; 883 searchTerm.endsGrouping = true; 884 searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty; 885 searchTerm.op = nsMsgSearchOp.Isnt; 886 value = searchTerm.value; 887 value.attrib = searchTerm.attrib; 888 value.status = this.kMessageFilthy; 889 searchTerm.value = value; 890 searchTerm.hdrProperty = GLODA_DIRTY_PROPERTY; 891 searchTerms.push(searchTerm); 892 893 // The use-case of already indexed messages does not want them reversed; 894 // we care about seeing the message keys in order. 895 this._indexingEnumerator = this._indexingDatabase.getFilterEnumerator( 896 searchTerms, 897 false 898 ); 899 } else if (aEnumKind == this.kEnumAllMsgs) { 900 this._indexingEnumerator = this._indexingDatabase.ReverseEnumerateMessages(); 901 } else { 902 throw new Error("Unknown enumerator type requested:" + aEnumKind); 903 } 904 }, 905 906 _indexerLeaveFolder() { 907 if (this._indexingFolder !== null) { 908 if (this._indexingDatabase) { 909 this._indexingDatabase.Commit(Ci.nsMsgDBCommitType.kLargeCommit); 910 // remove our listener! 911 this._indexingDatabase.RemoveListener(this._databaseAnnouncerListener); 912 } 913 // let the gloda folder know we are done indexing 914 this._indexingGlodaFolder.indexing = false; 915 // null everyone out 916 this._indexingFolder = null; 917 this._indexingGlodaFolder = null; 918 this._indexingDatabase = null; 919 this._indexingEnumerator = null; 920 } 921 }, 922 923 /** 924 * Event fed to us by our nsIFolderListener when a folder is loaded. We use 925 * this event to know when a folder we were trying to open to index is 926 * actually ready to be indexed. (The summary may have not existed, may have 927 * been out of date, or otherwise.) 928 * 929 * @param aFolder An nsIMsgFolder, already QI'd. 930 */ 931 _onFolderLoaded(aFolder) { 932 if ( 933 this._pendingFolderEntry !== null && 934 aFolder.URI == this._pendingFolderEntry.URI 935 ) { 936 this._indexerCompletePendingFolderEntry(); 937 } 938 }, 939 940 // it's a getter so we can reference 'this'. we could memoize. 941 get workers() { 942 return [ 943 [ 944 "folderSweep", 945 { 946 worker: this._worker_indexingSweep, 947 jobCanceled: this._cleanup_indexingSweep, 948 cleanup: this._cleanup_indexingSweep, 949 }, 950 ], 951 [ 952 "folder", 953 { 954 worker: this._worker_folderIndex, 955 recover: this._recover_indexMessage, 956 cleanup: this._cleanup_indexing, 957 }, 958 ], 959 [ 960 "folderCompact", 961 { 962 worker: this._worker_folderCompactionPass, 963 // compaction enters the folder so needs to know how to leave 964 cleanup: this._cleanup_indexing, 965 }, 966 ], 967 [ 968 "message", 969 { 970 worker: this._worker_messageIndex, 971 onSchedule: this._schedule_messageIndex, 972 jobCanceled: this._canceled_messageIndex, 973 recover: this._recover_indexMessage, 974 cleanup: this._cleanup_indexing, 975 }, 976 ], 977 [ 978 "delete", 979 { 980 worker: this._worker_processDeletes, 981 }, 982 ], 983 984 [ 985 "fixMissingContacts", 986 { 987 worker: this._worker_fixMissingContacts, 988 }, 989 ], 990 ]; 991 }, 992 993 _schemaMigrationInitiated: false, 994 _considerSchemaMigration() { 995 if ( 996 !this._schemaMigrationInitiated && 997 GlodaDatastore._actualSchemaVersion === 26 998 ) { 999 let job = new IndexingJob("fixMissingContacts", null); 1000 GlodaIndexer.indexJob(job); 1001 this._schemaMigrationInitiated = true; 1002 } 1003 }, 1004 1005 initialSweep() { 1006 this.indexingSweepNeeded = true; 1007 }, 1008 1009 _indexingSweepActive: false, 1010 /** 1011 * Indicate that an indexing sweep is desired. We kick-off an indexing 1012 * sweep at start-up and whenever we receive an event-based notification 1013 * that we either can't process as an event or that we normally handle 1014 * during the sweep pass anyways. 1015 */ 1016 set indexingSweepNeeded(aNeeded) { 1017 if (!this._indexingSweepActive && aNeeded) { 1018 let job = new IndexingJob("folderSweep", null); 1019 job.mappedFolders = false; 1020 GlodaIndexer.indexJob(job); 1021 this._indexingSweepActive = true; 1022 } 1023 }, 1024 1025 /** 1026 * Performs the folder sweep, locating folders that should be indexed, and 1027 * creating a folder indexing job for them, and rescheduling itself for 1028 * execution after that job is completed. Once it indexes all the folders, 1029 * if we believe we have deletions to process (or just don't know), it kicks 1030 * off a deletion processing job. 1031 * 1032 * Folder traversal logic is based off the spotlight/vista indexer code; we 1033 * retrieve the list of servers and folders each time want to find a new 1034 * folder to index. This avoids needing to maintain a perfect model of the 1035 * folder hierarchy at all times. (We may eventually want to do that, but 1036 * this is sufficient and safe for now.) Although our use of dirty flags on 1037 * the folders allows us to avoid tracking the 'last folder' we processed, 1038 * we do so to avoid getting 'trapped' in a folder with a high rate of 1039 * changes. 1040 */ 1041 *_worker_indexingSweep(aJob) { 1042 if (!aJob.mappedFolders) { 1043 // Walk the folders and make sure all the folders we would want to index 1044 // are mapped. Build up a list of GlodaFolders as we go, so that we can 1045 // sort them by their indexing priority. 1046 let foldersToProcess = (aJob.foldersToProcess = []); 1047 1048 for (let folder of MailServices.accounts.allFolders) { 1049 if (this.shouldIndexFolder(folder)) { 1050 foldersToProcess.push(Gloda.getFolderForFolder(folder)); 1051 } 1052 } 1053 1054 // sort the folders by priority (descending) 1055 foldersToProcess.sort(function(a, b) { 1056 return b.indexingPriority - a.indexingPriority; 1057 }); 1058 1059 aJob.mappedFolders = true; 1060 } 1061 1062 // -- process the folders (in sorted order) 1063 while (aJob.foldersToProcess.length) { 1064 let glodaFolder = aJob.foldersToProcess.shift(); 1065 // ignore folders that: 1066 // - have been deleted out of existence! 1067 // - are not dirty/have not been compacted 1068 // - are actively being compacted 1069 if ( 1070 glodaFolder._deleted || 1071 (!glodaFolder.dirtyStatus && !glodaFolder.compacted) || 1072 glodaFolder.compacting 1073 ) { 1074 continue; 1075 } 1076 1077 // If the folder is marked as compacted, give it a compaction job. 1078 if (glodaFolder.compacted) { 1079 GlodaIndexer.indexJob(new IndexingJob("folderCompact", glodaFolder.id)); 1080 } 1081 1082 // add a job for the folder indexing if it was dirty 1083 if (glodaFolder.dirtyStatus) { 1084 GlodaIndexer.indexJob(new IndexingJob("folder", glodaFolder.id)); 1085 } 1086 1087 // re-schedule this job (although this worker will die) 1088 GlodaIndexer.indexJob(aJob); 1089 yield this.kWorkDone; 1090 } 1091 1092 // consider deletion 1093 if (this.pendingDeletions || this.pendingDeletions === null) { 1094 GlodaIndexer.indexJob(new IndexingJob("delete", null)); 1095 } 1096 1097 // we don't have any more work to do... 1098 this._indexingSweepActive = false; 1099 yield this.kWorkDone; 1100 }, 1101 1102 /** 1103 * The only state we need to cleanup is that there is no longer an active 1104 * indexing sweep. 1105 */ 1106 _cleanup_indexingSweep(aJob) { 1107 this._indexingSweepActive = false; 1108 }, 1109 1110 /** 1111 * The number of headers to look at before yielding with kWorkSync. This 1112 * is for time-slicing purposes so we still yield to the UI periodically. 1113 */ 1114 HEADER_CHECK_SYNC_BLOCK_SIZE: 25, 1115 1116 FOLDER_COMPACTION_PASS_BATCH_SIZE: 512, 1117 /** 1118 * Special indexing pass for (local) folders than have been compacted. The 1119 * compaction can cause message keys to change because message keys in local 1120 * folders are simply offsets into the mbox file. Accordingly, we need to 1121 * update the gloda records/objects to point them at the new message key. 1122 * 1123 * Our general algorithm is to perform two traversals in parallel. The first 1124 * is a straightforward enumeration of the message headers in the folder that 1125 * apparently have been already indexed. These provide us with the message 1126 * key and the "gloda-id" property. 1127 * The second is a list of tuples containing a gloda message id, its current 1128 * message key per the gloda database, and the message-id header. We re-fill 1129 * the list with batches on-demand. This allows us to both avoid dispatching 1130 * needless UPDATEs as well as deal with messages that were tracked by the 1131 * PendingCommitTracker but were discarded by the compaction notification. 1132 * 1133 * We end up processing two streams of gloda-id's and some extra info. In 1134 * the normal case we expect these two streams to line up exactly and all 1135 * we need to do is update the message key if it has changed. 1136 * 1137 * There are a few exceptional cases where things do not line up: 1138 * 1) The gloda database knows about a message that the enumerator does not 1139 * know about... 1140 * a) This message exists in the folder (identified using its message-id 1141 * header). This means the message got indexed but PendingCommitTracker 1142 * had to forget about the info when the compaction happened. We 1143 * re-establish the link and track the message in PendingCommitTracker 1144 * again. 1145 * b) The message does not exist in the folder. This means the message got 1146 * indexed, PendingCommitTracker had to forget about the info, and 1147 * then the message either got moved or deleted before now. We mark 1148 * the message as deleted; this allows the gloda message to be reused 1149 * if the move target has not yet been indexed or purged if it already 1150 * has been and the gloda message is a duplicate. And obviously, if the 1151 * event that happened was actually a delete, then the delete is the 1152 * right thing to do. 1153 * 2) The enumerator knows about a message that the gloda database does not 1154 * know about. This is unexpected and should not happen. We log a 1155 * warning. We are able to differentiate this case from case #1a by 1156 * retrieving the message header associated with the next gloda message 1157 * (using the message-id header per 1a again). If the gloda message's 1158 * message key is after the enumerator's message key then we know this is 1159 * case #2. (It implies an insertion in the enumerator stream which is how 1160 * we define the unexpected case.) 1161 * 1162 * Besides updating the database rows, we also need to make sure that 1163 * in-memory representations are updated. Immediately after dispatching 1164 * UPDATE changes to the database we use the same set of data to walk the 1165 * live collections and update any affected messages. We are then able to 1166 * discard the information. Although this means that we will have to 1167 * potentially walk the live collections multiple times, unless something 1168 * has gone horribly wrong, the number of collections should be reasonable 1169 * and the lookups are cheap. We bias batch sizes accordingly. 1170 * 1171 * Because we operate based on chunks we need to make sure that when we 1172 * actually deal with multiple chunks that we don't step on our own feet with 1173 * our database updates. Since compaction of message key K results in a new 1174 * message key K' such that K' <= K, we can reliably issue database 1175 * updates for all values <= K. Which means our feet are safe no matter 1176 * when we issue the update command. For maximum cache benefit, we issue 1177 * our updates prior to our new query since they should still be maximally 1178 * hot at that point. 1179 */ 1180 *_worker_folderCompactionPass(aJob, aCallbackHandle) { 1181 yield this._indexerEnterFolder(aJob.id); 1182 1183 // It's conceivable that with a folder sweep we might end up trying to 1184 // compact a folder twice. Bail early in this case. 1185 if (!this._indexingGlodaFolder.compacted) { 1186 yield this.kWorkDone; 1187 } 1188 1189 // this is a forward enumeration (sometimes we reverse enumerate; not here) 1190 this._indexerGetEnumerator(this.kEnumIndexedMsgs); 1191 1192 const HEADER_CHECK_SYNC_BLOCK_SIZE = this.HEADER_CHECK_SYNC_BLOCK_SIZE; 1193 const FOLDER_COMPACTION_PASS_BATCH_SIZE = this 1194 .FOLDER_COMPACTION_PASS_BATCH_SIZE; 1195 1196 // Tuples of [gloda id, message key, message-id header] from 1197 // folderCompactionPassBlockFetch 1198 let glodaIdsMsgKeysHeaderIds = []; 1199 // Unpack each tuple from glodaIdsMsgKeysHeaderIds into these guys. 1200 // (Initialize oldMessageKey because we use it to kickstart our query.) 1201 let oldGlodaId, 1202 oldMessageKey = -1, 1203 oldHeaderMessageId; 1204 // parallel lists of gloda ids and message keys to pass to 1205 // GlodaDatastore.updateMessageLocations 1206 let updateGlodaIds = []; 1207 let updateMessageKeys = []; 1208 // list of gloda id's to mark deleted 1209 let deleteGlodaIds = []; 1210 1211 // for GC reasons we need to track the number of headers seen 1212 let numHeadersSeen = 0; 1213 1214 // We are consuming two lists; our loop structure has to reflect that. 1215 let headerIter = this._indexingEnumerator[Symbol.iterator](); 1216 let mayHaveMoreGlodaMessages = true; 1217 let keepIterHeader = false; 1218 let keepGlodaTuple = false; 1219 let msgHdr = null; 1220 while (headerIter || mayHaveMoreGlodaMessages) { 1221 let glodaId; 1222 if (headerIter) { 1223 if (!keepIterHeader) { 1224 let result = headerIter.next(); 1225 if (result.done) { 1226 headerIter = null; 1227 msgHdr = null; 1228 // do the loop check again 1229 continue; 1230 } 1231 msgHdr = result.value; 1232 } else { 1233 keepIterHeader = false; 1234 } 1235 } 1236 1237 if (msgHdr) { 1238 numHeadersSeen++; 1239 if (numHeadersSeen % HEADER_CHECK_SYNC_BLOCK_SIZE == 0) { 1240 yield this.kWorkSync; 1241 } 1242 1243 // There is no need to check with PendingCommitTracker. If a message 1244 // somehow got indexed between the time the compaction killed 1245 // everything and the time we run, that is a bug. 1246 glodaId = msgHdr.getUint32Property(GLODA_MESSAGE_ID_PROPERTY); 1247 // (there is also no need to check for gloda dirty since the enumerator 1248 // filtered that for us.) 1249 } 1250 1251 // get more [gloda id, message key, message-id header] tuples if out 1252 if (!glodaIdsMsgKeysHeaderIds.length && mayHaveMoreGlodaMessages) { 1253 // Since we operate on blocks, getting a new block implies we should 1254 // flush the last block if applicable. 1255 if (updateGlodaIds.length) { 1256 GlodaDatastore.updateMessageLocations( 1257 updateGlodaIds, 1258 updateMessageKeys, 1259 aJob.id, 1260 true 1261 ); 1262 updateGlodaIds = []; 1263 updateMessageKeys = []; 1264 } 1265 1266 if (deleteGlodaIds.length) { 1267 GlodaDatastore.markMessagesDeletedByIDs(deleteGlodaIds); 1268 deleteGlodaIds = []; 1269 } 1270 1271 GlodaDatastore.folderCompactionPassBlockFetch( 1272 aJob.id, 1273 oldMessageKey + 1, 1274 FOLDER_COMPACTION_PASS_BATCH_SIZE, 1275 aCallbackHandle.wrappedCallback 1276 ); 1277 glodaIdsMsgKeysHeaderIds = yield this.kWorkAsync; 1278 // Reverse so we can use pop instead of shift and I don't need to be 1279 // paranoid about performance. 1280 glodaIdsMsgKeysHeaderIds.reverse(); 1281 1282 if (!glodaIdsMsgKeysHeaderIds.length) { 1283 mayHaveMoreGlodaMessages = false; 1284 1285 // We shouldn't be in the loop anymore if headerIter is dead now. 1286 if (!headerIter) { 1287 break; 1288 } 1289 } 1290 } 1291 1292 if (!keepGlodaTuple) { 1293 if (mayHaveMoreGlodaMessages) { 1294 [ 1295 oldGlodaId, 1296 oldMessageKey, 1297 oldHeaderMessageId, 1298 ] = glodaIdsMsgKeysHeaderIds.pop(); 1299 } else { 1300 oldGlodaId = oldMessageKey = oldHeaderMessageId = null; 1301 } 1302 } else { 1303 keepGlodaTuple = false; 1304 } 1305 1306 // -- normal expected case 1307 if (glodaId == oldGlodaId) { 1308 // only need to do something if the key is not right 1309 if (msgHdr.messageKey != oldMessageKey) { 1310 updateGlodaIds.push(glodaId); 1311 updateMessageKeys.push(msgHdr.messageKey); 1312 } 1313 } else { 1314 // -- exceptional cases 1315 // This should always return a value unless something is very wrong. 1316 // We do not want to catch the exception if one happens. 1317 let idBasedHeader = oldHeaderMessageId 1318 ? this._indexingDatabase.getMsgHdrForMessageID(oldHeaderMessageId) 1319 : false; 1320 // - Case 1b. 1321 // We want to mark the message as deleted. 1322 if (idBasedHeader == null) { 1323 deleteGlodaIds.push(oldGlodaId); 1324 } else if ( 1325 idBasedHeader && 1326 ((msgHdr && idBasedHeader.messageKey < msgHdr.messageKey) || !msgHdr) 1327 ) { 1328 // - Case 1a 1329 // The expected case is that the message referenced by the gloda 1330 // database precedes the header the enumerator told us about. This 1331 // is expected because if PendingCommitTracker did not mark the 1332 // message as indexed/clean then the enumerator would not tell us 1333 // about it. 1334 // Also, if we ran out of headers from the enumerator, this is a dead 1335 // giveaway that this is the expected case. 1336 // tell the pending commit tracker about the gloda database one 1337 PendingCommitTracker.track(idBasedHeader, oldGlodaId); 1338 // and we might need to update the message key too 1339 if (idBasedHeader.messageKey != oldMessageKey) { 1340 updateGlodaIds.push(oldGlodaId); 1341 updateMessageKeys.push(idBasedHeader.messageKey); 1342 } 1343 // Take another pass through the loop so that we check the 1344 // enumerator header against the next message in the gloda 1345 // database. 1346 keepIterHeader = true; 1347 } else if (msgHdr) { 1348 // - Case 2 1349 // Whereas if the message referenced by gloda has a message key 1350 // greater than the one returned by the enumerator, then we have a 1351 // header claiming to be indexed by gloda that gloda does not 1352 // actually know about. This is exceptional and gets a warning. 1353 this._log.warn( 1354 "Observed header that claims to be gloda indexed " + 1355 "but that gloda has never heard of during " + 1356 "compaction." + 1357 " In folder: " + 1358 msgHdr.folder.URI + 1359 " sketchy key: " + 1360 msgHdr.messageKey + 1361 " subject: " + 1362 msgHdr.mime2DecodedSubject 1363 ); 1364 // Keep this tuple around for the next enumerator provided header 1365 keepGlodaTuple = true; 1366 } 1367 } 1368 } 1369 // If we don't flush the update, no one will! 1370 if (updateGlodaIds.length) { 1371 GlodaDatastore.updateMessageLocations( 1372 updateGlodaIds, 1373 updateMessageKeys, 1374 aJob.id, 1375 true 1376 ); 1377 } 1378 if (deleteGlodaIds.length) { 1379 GlodaDatastore.markMessagesDeletedByIDs(deleteGlodaIds); 1380 } 1381 1382 this._indexingGlodaFolder._setCompactedState(false); 1383 1384 this._indexerLeaveFolder(); 1385 yield this.kWorkDone; 1386 }, 1387 1388 /** 1389 * Index the contents of a folder. 1390 */ 1391 *_worker_folderIndex(aJob, aCallbackHandle) { 1392 yield this._indexerEnterFolder(aJob.id); 1393 1394 if (!this.shouldIndexFolder(this._indexingFolder)) { 1395 aJob.safelyInvokeCallback(true); 1396 yield this.kWorkDone; 1397 } 1398 1399 // Make sure listeners get notified about this job. 1400 GlodaIndexer._notifyListeners(); 1401 1402 // there is of course a cost to all this header investigation even if we 1403 // don't do something. so we will yield with kWorkSync for every block. 1404 const HEADER_CHECK_SYNC_BLOCK_SIZE = this.HEADER_CHECK_SYNC_BLOCK_SIZE; 1405 1406 // we can safely presume if we are here that this folder has been selected 1407 // for offline processing... 1408 1409 // -- Filthy Folder 1410 // A filthy folder may have misleading properties on the message that claim 1411 // the message is indexed. They are misleading because the database, for 1412 // whatever reason, does not have the messages (accurately) indexed. 1413 // We need to walk all the messages and mark them filthy if they have a 1414 // dirty property. Once we have done this, we can downgrade the folder's 1415 // dirty status to plain dirty. We do this rather than trying to process 1416 // everyone in one go in a filthy context because if we have to terminate 1417 // indexing before we quit, we don't want to have to re-index messages next 1418 // time. (This could even lead to never completing indexing in a 1419 // pathological situation.) 1420 let glodaFolder = GlodaDatastore._mapFolder(this._indexingFolder); 1421 if (glodaFolder.dirtyStatus == glodaFolder.kFolderFilthy) { 1422 this._indexerGetEnumerator(this.kEnumIndexedMsgs, true); 1423 let count = 0; 1424 for (let msgHdr of this._indexingEnumerator) { 1425 // we still need to avoid locking up the UI, pause periodically... 1426 if (++count % HEADER_CHECK_SYNC_BLOCK_SIZE == 0) { 1427 yield this.kWorkSync; 1428 } 1429 1430 let glodaMessageId = msgHdr.getUint32Property( 1431 GLODA_MESSAGE_ID_PROPERTY 1432 ); 1433 // if it has a gloda message id, we need to mark it filthy 1434 if (glodaMessageId != 0) { 1435 msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, this.kMessageFilthy); 1436 } 1437 // if it doesn't have a gloda message id, we will definitely index it, 1438 // so no action is required. 1439 } 1440 // Commit the filthy status changes to the message database. 1441 this._indexingDatabase.Commit(Ci.nsMsgDBCommitType.kLargeCommit); 1442 1443 // this will automatically persist to the database 1444 glodaFolder._downgradeDirtyStatus(glodaFolder.kFolderDirty); 1445 } 1446 1447 // Figure out whether we're supposed to index _everything_ or just what 1448 // has not yet been indexed. 1449 let force = "force" in aJob && aJob.force; 1450 let enumeratorType = force ? this.kEnumAllMsgs : this.kEnumMsgsToIndex; 1451 1452 // Pass 1: count the number of messages to index. 1453 // We do this in order to be able to report to the user what we're doing. 1454 // TODO: give up after reaching a certain number of messages in folders 1455 // with ridiculous numbers of messages and make the interface just say 1456 // something like "over N messages to go." 1457 1458 this._indexerGetEnumerator(enumeratorType); 1459 1460 let numMessagesToIndex = 0; 1461 // eslint-disable-next-line no-unused-vars 1462 for (let ignore of this._indexingEnumerator) { 1463 // We're only counting, so do bigger chunks on this pass. 1464 ++numMessagesToIndex; 1465 if (numMessagesToIndex % (HEADER_CHECK_SYNC_BLOCK_SIZE * 8) == 0) { 1466 yield this.kWorkSync; 1467 } 1468 } 1469 1470 aJob.goal = numMessagesToIndex; 1471 1472 if (numMessagesToIndex > 0) { 1473 // We used up the iterator, get a new one. 1474 this._indexerGetEnumerator(enumeratorType); 1475 1476 // Pass 2: index the messages. 1477 let count = 0; 1478 for (let msgHdr of this._indexingEnumerator) { 1479 // per above, we want to periodically release control while doing all 1480 // this header traversal/investigation. 1481 if (++count % HEADER_CHECK_SYNC_BLOCK_SIZE == 0) { 1482 yield this.kWorkSync; 1483 } 1484 1485 // To keep our counts more accurate, increment the offset before 1486 // potentially skipping any messages. 1487 ++aJob.offset; 1488 1489 // Skip messages that have not yet been reported to us as existing via 1490 // msgsClassified. 1491 if ( 1492 this._indexingFolder.getProcessingFlags(msgHdr.messageKey) & 1493 NOT_YET_REPORTED_PROCESSING_FLAGS 1494 ) { 1495 continue; 1496 } 1497 1498 // Because the gloda id could be in-flight, we need to double-check the 1499 // enumerator here since it can't know about our in-memory stuff. 1500 let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(msgHdr); 1501 // if the message seems valid and we are not forcing indexing, skip it. 1502 // (that means good gloda id and not dirty) 1503 if ( 1504 !force && 1505 glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && 1506 glodaDirty == this.kMessageClean 1507 ) { 1508 continue; 1509 } 1510 1511 this._log.debug(">>> calling _indexMessage"); 1512 yield aCallbackHandle.pushAndGo( 1513 this._indexMessage(msgHdr, aCallbackHandle), 1514 { what: "indexMessage", msgHdr } 1515 ); 1516 GlodaIndexer._indexedMessageCount++; 1517 this._log.debug("<<< back from _indexMessage"); 1518 } 1519 } 1520 1521 // This will trigger an (async) db update which cannot hit the disk prior to 1522 // the actual database records that constitute the clean state. 1523 // XXX There is the slight possibility that, in the event of a crash, this 1524 // will hit the disk but the gloda-id properties on the headers will not 1525 // get set. This should ideally be resolved by detecting a non-clean 1526 // shutdown and marking all folders as dirty. 1527 glodaFolder._downgradeDirtyStatus(glodaFolder.kFolderClean); 1528 1529 // by definition, it's not likely we'll visit this folder again anytime soon 1530 this._indexerLeaveFolder(); 1531 1532 aJob.safelyInvokeCallback(true); 1533 1534 yield this.kWorkDone; 1535 }, 1536 1537 /** 1538 * Invoked when a "message" job is scheduled so that we can clear 1539 * _pendingAddJob if that is the job. We do this so that work items are not 1540 * added to _pendingAddJob while it is being processed. 1541 */ 1542 _schedule_messageIndex(aJob, aCallbackHandle) { 1543 // we do not want new work items to be added as we are processing, so 1544 // clear _pendingAddJob. A new job will be created as needed. 1545 if (aJob === this._pendingAddJob) { 1546 this._pendingAddJob = null; 1547 } 1548 // update our goal from the items length 1549 aJob.goal = aJob.items.length; 1550 }, 1551 /** 1552 * If the job gets canceled, we need to make sure that we clear out pending 1553 * add job or our state will get wonky. 1554 */ 1555 _canceled_messageIndex(aJob) { 1556 if (aJob === this._pendingAddJob) { 1557 this._pendingAddJob = null; 1558 } 1559 }, 1560 1561 /** 1562 * Index a specific list of messages that we know to index from 1563 * event-notification hints. 1564 */ 1565 *_worker_messageIndex(aJob, aCallbackHandle) { 1566 // if we are already in the correct folder, our "get in the folder" clause 1567 // will not execute, so we need to make sure this value is accurate in 1568 // that case. (and we want to avoid multiple checks...) 1569 for (; aJob.offset < aJob.items.length; aJob.offset++) { 1570 let item = aJob.items[aJob.offset]; 1571 // item is either [folder ID, message key] or 1572 // [folder ID, message ID] 1573 1574 let glodaFolderId = item[0]; 1575 // If the folder has been deleted since we queued, skip this message 1576 if (!GlodaDatastore._folderIdKnown(glodaFolderId)) { 1577 continue; 1578 } 1579 let glodaFolder = GlodaDatastore._mapFolderID(glodaFolderId); 1580 1581 // Stay out of folders that: 1582 // - are compacting / compacted and not yet processed 1583 // - got deleted (this would be redundant if we had a stance on id nukage) 1584 // (these things could have changed since we queued the event) 1585 if ( 1586 glodaFolder.compacting || 1587 glodaFolder.compacted || 1588 glodaFolder._deleted 1589 ) { 1590 continue; 1591 } 1592 1593 // get in the folder 1594 if (this._indexingGlodaFolder != glodaFolder) { 1595 yield this._indexerEnterFolder(glodaFolderId); 1596 1597 // Now that we have the real nsIMsgFolder, sanity-check that we should 1598 // be indexing it. (There are some checks that require the 1599 // nsIMsgFolder.) 1600 if (!this.shouldIndexFolder(this._indexingFolder)) { 1601 continue; 1602 } 1603 } 1604 1605 let msgHdr; 1606 // GetMessageHeader can be affected by the use cache, so we need to check 1607 // ContainsKey first to see if the header is really actually there. 1608 if (typeof item[1] == "number") { 1609 msgHdr = 1610 this._indexingDatabase.ContainsKey(item[1]) && 1611 this._indexingFolder.GetMessageHeader(item[1]); 1612 } else { 1613 // Same deal as in move processing. 1614 // TODO fixme to not assume singular message-id's. 1615 msgHdr = this._indexingDatabase.getMsgHdrForMessageID(item[1]); 1616 } 1617 1618 if (msgHdr) { 1619 yield aCallbackHandle.pushAndGo( 1620 this._indexMessage(msgHdr, aCallbackHandle), 1621 { what: "indexMessage", msgHdr } 1622 ); 1623 } else { 1624 yield this.kWorkSync; 1625 } 1626 } 1627 1628 // There is no real reason to stay 'in' the folder. If we are going to get 1629 // more events from the folder, its database would have to be open for us 1630 // to get the events, so it's not like we're creating an efficiency 1631 // problem where we unload a folder just to load it again in 2 seconds. 1632 // (Well, at least assuming the views are good about holding onto the 1633 // database references even though they go out of their way to avoid 1634 // holding onto message header references.) 1635 this._indexerLeaveFolder(); 1636 1637 yield this.kWorkDone; 1638 }, 1639 1640 /** 1641 * Recover from a "folder" or "message" job failing inside a call to 1642 * |_indexMessage|, marking the message bad. If we were not in an 1643 * |_indexMessage| call, then fail to recover. 1644 * 1645 * @param aJob The job that was being worked. We ignore this for now. 1646 * @param aContextStack The callbackHandle mechanism's context stack. When we 1647 * invoke pushAndGo for _indexMessage we put something in so we can 1648 * detect when it is on the async stack. 1649 * @param aException The exception that is necessitating we attempt to 1650 * recover. 1651 * 1652 * @return 1 if we were able to recover (because we want the call stack 1653 * popped down to our worker), false if we can't. 1654 */ 1655 _recover_indexMessage(aJob, aContextStack, aException) { 1656 // See if indexMessage is on the stack... 1657 if ( 1658 aContextStack.length >= 2 && 1659 aContextStack[1] && 1660 "what" in aContextStack[1] && 1661 aContextStack[1].what == "indexMessage" 1662 ) { 1663 // it is, so this is probably recoverable. 1664 1665 this._log.debug( 1666 "Exception while indexing message, marking it bad (gloda id of 1)." 1667 ); 1668 1669 // -- Mark the message as bad 1670 let msgHdr = aContextStack[1].msgHdr; 1671 // (In the worst case, the header is no longer valid, which will result in 1672 // exceptions. We need to be prepared for that.) 1673 try { 1674 msgHdr.setUint32Property( 1675 GLODA_MESSAGE_ID_PROPERTY, 1676 GLODA_BAD_MESSAGE_ID 1677 ); 1678 // clear the dirty bit if it has one 1679 if (msgHdr.getUint32Property(GLODA_DIRTY_PROPERTY)) { 1680 msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, 0); 1681 } 1682 } catch (ex) { 1683 // If we are indexing a folder and the message header is no longer 1684 // valid, then it's quite likely the whole folder is no longer valid. 1685 // But since in the event-driven message indexing case we could have 1686 // other valid things to look at, let's try and recover. The folder 1687 // indexing case will come back to us shortly and we will indicate 1688 // recovery is not possible at that point. 1689 // So do nothing here since by popping the indexing of the specific 1690 // message out of existence we are recovering. 1691 } 1692 return 1; 1693 } 1694 return false; 1695 }, 1696 1697 /** 1698 * Cleanup after an aborted "folder" or "message" job. 1699 */ 1700 _cleanup_indexing(aJob) { 1701 this._indexerLeaveFolder(); 1702 aJob.safelyInvokeCallback(false); 1703 }, 1704 1705 /** 1706 * Maximum number of deleted messages to process at a time. Arbitrary; there 1707 * are no real known performance constraints at this point. 1708 */ 1709 DELETED_MESSAGE_BLOCK_SIZE: 32, 1710 1711 /** 1712 * Process pending deletes... 1713 */ 1714 *_worker_processDeletes(aJob, aCallbackHandle) { 1715 // Count the number of messages we will eventually process. People freak 1716 // out when the number is constantly increasing because they think gloda 1717 // has gone rogue. (Note: new deletions can still accumulate during 1718 // our execution, so we may 'expand' our count a little still.) 1719 this._datastore.countDeletedMessages(aCallbackHandle.wrappedCallback); 1720 aJob.goal = yield this.kWorkAsync; 1721 this._log.debug( 1722 "There are currently " + 1723 aJob.goal + 1724 " messages awaiting" + 1725 " deletion processing." 1726 ); 1727 1728 // get a block of messages to delete. 1729 let query = Gloda.newQuery(Gloda.NOUN_MESSAGE, { 1730 noDbQueryValidityConstraints: true, 1731 }); 1732 query._deleted(1); 1733 query.limit(this.DELETED_MESSAGE_BLOCK_SIZE); 1734 let deletedCollection = query.getCollection(aCallbackHandle); 1735 yield this.kWorkAsync; 1736 1737 while (deletedCollection.items.length) { 1738 for (let message of deletedCollection.items) { 1739 // If it turns out our count is wrong (because some new deletions 1740 // happened since we entered this worker), let's issue a new count 1741 // and use that to accurately update our goal. 1742 if (aJob.offset >= aJob.goal) { 1743 this._datastore.countDeletedMessages(aCallbackHandle.wrappedCallback); 1744 aJob.goal += yield this.kWorkAsync; 1745 } 1746 1747 yield aCallbackHandle.pushAndGo( 1748 this._deleteMessage(message, aCallbackHandle) 1749 ); 1750 aJob.offset++; 1751 yield this.kWorkSync; 1752 } 1753 1754 deletedCollection = query.getCollection(aCallbackHandle); 1755 yield this.kWorkAsync; 1756 } 1757 this.pendingDeletions = false; 1758 1759 yield this.kWorkDone; 1760 }, 1761 1762 *_worker_fixMissingContacts(aJob, aCallbackHandle) { 1763 let identityContactInfos = []; 1764 1765 // -- asynchronously get a list of all identities without contacts 1766 // The upper bound on the number of messed up contacts is the number of 1767 // contacts in the user's address book. This should be small enough 1768 // (and the data size small enough) that this won't explode thunderbird. 1769 let queryStmt = GlodaDatastore._createAsyncStatement( 1770 "SELECT identities.id, identities.contactID, identities.value " + 1771 "FROM identities " + 1772 "LEFT JOIN contacts ON identities.contactID = contacts.id " + 1773 "WHERE identities.kind = 'email' AND contacts.id IS NULL", 1774 true 1775 ); 1776 queryStmt.executeAsync({ 1777 handleResult(aResultSet) { 1778 let row; 1779 while ((row = aResultSet.getNextRow())) { 1780 identityContactInfos.push({ 1781 identityId: row.getInt64(0), 1782 contactId: row.getInt64(1), 1783 email: row.getString(2), 1784 }); 1785 } 1786 }, 1787 handleError(aError) {}, 1788 handleCompletion(aReason) { 1789 GlodaDatastore._asyncCompleted(); 1790 aCallbackHandle.wrappedCallback(); 1791 }, 1792 }); 1793 queryStmt.finalize(); 1794 GlodaDatastore._pendingAsyncStatements++; 1795 yield this.kWorkAsync; 1796 1797 // -- perform fixes only if there were missing contacts 1798 if (identityContactInfos.length) { 1799 const yieldEvery = 64; 1800 // - create the missing contacts 1801 for (let i = 0; i < identityContactInfos.length; i++) { 1802 if (i % yieldEvery === 0) { 1803 yield this.kWorkSync; 1804 } 1805 1806 let info = identityContactInfos[i], 1807 card = MailServices.ab.cardForEmailAddress(info.email), 1808 contact = new GlodaContact( 1809 GlodaDatastore, 1810 info.contactId, 1811 null, 1812 null, 1813 card ? card.displayName || info.email : info.email, 1814 0, 1815 0 1816 ); 1817 GlodaDatastore.insertContact(contact); 1818 1819 // update the in-memory rep of the identity to know about the contact 1820 // if there is one. 1821 let identity = GlodaCollectionManager.cacheLookupOne( 1822 Gloda.NOUN_IDENTITY, 1823 info.identityId, 1824 false 1825 ); 1826 if (identity) { 1827 // Unfortunately, although this fixes the (reachable) Identity and 1828 // exposes the Contact, it does not make the Contact reachable from 1829 // the collection manager. This will make explicit queries that look 1830 // up the contact potentially see the case where 1831 // contact.identities[0].contact !== contact. Alternately, that 1832 // may not happen and instead the "contact" object we created above 1833 // may become unlinked. (I'd have to trace some logic I don't feel 1834 // like tracing.) Either way, The potential fallout is minimal 1835 // since the object identity invariant will just lapse and popularity 1836 // on the contact may become stale, and neither of those meaningfully 1837 // affect the operation of anything in Thunderbird. 1838 // If we really cared, we could find all the dominant collections 1839 // that reference the identity and update their corresponding 1840 // contact collection to make it reachable. That use-case does not 1841 // exist outside of here, which is why we're punting. 1842 identity._contact = contact; 1843 contact._identities = [identity]; 1844 } 1845 1846 // NOTE: If the addressbook indexer did anything useful other than 1847 // adapting to name changes, we could schedule indexing of the cards at 1848 // this time. However, as of this writing, it doesn't, and this task 1849 // is a one-off relevant only to the time of this writing. 1850 } 1851 1852 // - mark all folders as dirty, initiate indexing sweep 1853 this.dirtyAllKnownFolders(); 1854 this.indexingSweepNeeded = true; 1855 } 1856 1857 // -- mark the schema upgrade, be done 1858 GlodaDatastore._updateSchemaVersion(GlodaDatastore._schemaVersion); 1859 yield this.kWorkDone; 1860 }, 1861 1862 /** 1863 * Determine whether a folder is suitable for indexing. 1864 * 1865 * @param aMsgFolder An nsIMsgFolder you want to see if we should index. 1866 * 1867 * @returns true if we want to index messages in this type of folder, false if 1868 * we do not. 1869 */ 1870 shouldIndexFolder(aMsgFolder) { 1871 let folderFlags = aMsgFolder.flags; 1872 // Completely ignore non-mail and virtual folders. They should never even 1873 // get to be GlodaFolder instances. 1874 if ( 1875 !(folderFlags & Ci.nsMsgFolderFlags.Mail) || 1876 folderFlags & Ci.nsMsgFolderFlags.Virtual 1877 ) { 1878 return false; 1879 } 1880 1881 // Some folders do not really exist; we can detect this by getStringProperty 1882 // exploding when we call it. This is primarily a concern because 1883 // _mapFolder calls said exploding method, but we also don't want to 1884 // even think about indexing folders that don't exist. (Such folders are 1885 // likely the result of a messed up profile.) 1886 try { 1887 // flags is used because it should always be in the cache avoiding a miss 1888 // which would compel an msf open. 1889 aMsgFolder.getStringProperty("flags"); 1890 } catch (ex) { 1891 return false; 1892 } 1893 1894 // Now see what our gloda folder information has to say about the folder. 1895 let glodaFolder = GlodaDatastore._mapFolder(aMsgFolder); 1896 return glodaFolder.indexingPriority != glodaFolder.kIndexingNeverPriority; 1897 }, 1898 1899 /** 1900 * Sets the indexing priority for this folder and persists it both to Gloda, 1901 * and, for backup purposes, to the nsIMsgFolder via string property as well. 1902 * 1903 * Setting this priority may cause the indexer to either reindex this folder, 1904 * or remove this folder from the existing index. 1905 * 1906 * @param {nsIMsgFolder} aFolder 1907 * @param {Number} aPriority (one of the priority constants from GlodaFolder) 1908 */ 1909 setFolderIndexingPriority(aFolder, aPriority) { 1910 let glodaFolder = GlodaDatastore._mapFolder(aFolder); 1911 1912 // if there's been no change, we're done 1913 if (aPriority == glodaFolder.indexingPriority) { 1914 return; 1915 } 1916 1917 // save off the old priority, and set the new one 1918 let previousPrio = glodaFolder.indexingPriority; 1919 glodaFolder._indexingPriority = aPriority; 1920 1921 // persist the new priority 1922 GlodaDatastore.updateFolderIndexingPriority(glodaFolder); 1923 aFolder.setStringProperty("indexingPriority", Number(aPriority).toString()); 1924 1925 // if we've been told never to index this folder... 1926 if (aPriority == glodaFolder.kIndexingNeverPriority) { 1927 // stop doing so 1928 if (this._indexingFolder == aFolder) { 1929 GlodaIndexer.killActiveJob(); 1930 } 1931 1932 // mark all existing messages as deleted 1933 GlodaDatastore.markMessagesDeletedByFolderID(glodaFolder.id); 1934 1935 // re-index 1936 GlodaMsgIndexer.indexingSweepNeeded = true; 1937 } else if (previousPrio == glodaFolder.kIndexingNeverPriority) { 1938 // there's no existing index, but the user now wants one 1939 glodaFolder._dirtyStatus = glodaFolder.kFolderFilthy; 1940 GlodaDatastore.updateFolderDirtyStatus(glodaFolder); 1941 GlodaMsgIndexer.indexingSweepNeeded = true; 1942 } 1943 }, 1944 1945 /** 1946 * Resets the indexing priority on the given folder to whatever the default 1947 * is for folders of that type. 1948 * 1949 * @note Calls setFolderIndexingPriority under the hood, so has identical 1950 * potential reindexing side-effects 1951 * 1952 * @param {nsIMsgFolder} aFolder 1953 * @param {boolean} aAllowSpecialFolderIndexing 1954 */ 1955 resetFolderIndexingPriority(aFolder, aAllowSpecialFolderIndexing) { 1956 this.setFolderIndexingPriority( 1957 aFolder, 1958 GlodaDatastore.getDefaultIndexingPriority( 1959 aFolder, 1960 aAllowSpecialFolderIndexing 1961 ) 1962 ); 1963 }, 1964 1965 /** 1966 * Queue all of the folders of all of the accounts of the current profile 1967 * for indexing. We traverse all folders and queue them immediately to try 1968 * and have an accurate estimate of the number of folders that need to be 1969 * indexed. (We previously queued accounts rather than immediately 1970 * walking their list of folders.) 1971 */ 1972 indexEverything() { 1973 this._log.info("Queueing all accounts for indexing."); 1974 1975 GlodaDatastore._beginTransaction(); 1976 for (let account of MailServices.accounts.accounts) { 1977 this.indexAccount(account); 1978 } 1979 GlodaDatastore._commitTransaction(); 1980 }, 1981 1982 /** 1983 * Queue all of the folders belonging to an account for indexing. 1984 */ 1985 indexAccount(aAccount) { 1986 let rootFolder = aAccount.incomingServer.rootFolder; 1987 if (rootFolder instanceof Ci.nsIMsgFolder) { 1988 this._log.info("Queueing account folders for indexing: " + aAccount.key); 1989 1990 for (let folder of rootFolder.descendants) { 1991 if (this.shouldIndexFolder(folder)) { 1992 GlodaIndexer.indexJob( 1993 new IndexingJob("folder", GlodaDatastore._mapFolder(folder).id) 1994 ); 1995 } 1996 } 1997 } else { 1998 this._log.info("Skipping Account, root folder not nsIMsgFolder"); 1999 } 2000 }, 2001 2002 /** 2003 * Queue a single folder for indexing given an nsIMsgFolder. 2004 * 2005 * @param [aOptions.callback] A callback to invoke when the folder finishes 2006 * indexing. First argument is true if the task ran to completion 2007 * successfully, false if we had to abort for some reason. 2008 * @param [aOptions.force=false] Should we force the indexing of all messages 2009 * in the folder (true) or just index what hasn't been indexed (false). 2010 * @return true if we are going to index the folder, false if not. 2011 */ 2012 indexFolder(aMsgFolder, aOptions) { 2013 if (!this.shouldIndexFolder(aMsgFolder)) { 2014 return false; 2015 } 2016 let glodaFolder = GlodaDatastore._mapFolder(aMsgFolder); 2017 // stay out of compacting/compacted folders 2018 if (glodaFolder.compacting || glodaFolder.compacted) { 2019 return false; 2020 } 2021 2022 this._log.info("Queue-ing folder for indexing: " + aMsgFolder.prettyName); 2023 let job = new IndexingJob("folder", glodaFolder.id); 2024 if (aOptions) { 2025 if ("callback" in aOptions) { 2026 job.callback = aOptions.callback; 2027 } 2028 if ("force" in aOptions) { 2029 job.force = true; 2030 } 2031 } 2032 GlodaIndexer.indexJob(job); 2033 return true; 2034 }, 2035 2036 /** 2037 * Queue a list of messages for indexing. 2038 * 2039 * @param aFoldersAndMessages List of [nsIMsgFolder, message key] tuples. 2040 */ 2041 indexMessages(aFoldersAndMessages) { 2042 let job = new IndexingJob("message", null); 2043 job.items = aFoldersAndMessages.map(fm => [ 2044 GlodaDatastore._mapFolder(fm[0]).id, 2045 fm[1], 2046 ]); 2047 GlodaIndexer.indexJob(job); 2048 }, 2049 2050 /** 2051 * Mark all known folders as dirty so that the next indexing sweep goes 2052 * into all folders and checks their contents to see if they need to be 2053 * indexed. 2054 * 2055 * This is being added for the migration case where we want to try and reindex 2056 * all of the messages that had been marked with GLODA_BAD_MESSAGE_ID but 2057 * which is now GLODA_OLD_BAD_MESSAGE_ID and so we should attempt to reindex 2058 * them. 2059 */ 2060 dirtyAllKnownFolders() { 2061 // Just iterate over the datastore's folder map and tell each folder to 2062 // be dirty if its priority is not disabled. 2063 for (let folderID in GlodaDatastore._folderByID) { 2064 let glodaFolder = GlodaDatastore._folderByID[folderID]; 2065 if (glodaFolder.indexingPriority !== glodaFolder.kIndexingNeverPriority) { 2066 glodaFolder._ensureFolderDirty(); 2067 } 2068 } 2069 }, 2070 2071 /** 2072 * Given a message header, return whether this message is likely to have 2073 * been indexed or not. 2074 * 2075 * This means the message must: 2076 * - Be in a folder eligible for gloda indexing. (Not News, etc.) 2077 * - Be in a non-filthy folder. 2078 * - Be gloda-indexed and non-filthy. 2079 * 2080 * @param aMsgHdr A message header. 2081 * @returns true if the message is likely to have been indexed. 2082 */ 2083 isMessageIndexed(aMsgHdr) { 2084 // If it's in a folder that we flat out do not index, say no. 2085 if (!this.shouldIndexFolder(aMsgHdr.folder)) { 2086 return false; 2087 } 2088 let glodaFolder = GlodaDatastore._mapFolder(aMsgHdr.folder); 2089 let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(aMsgHdr); 2090 return ( 2091 glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && 2092 glodaDirty != GlodaMsgIndexer.kMessageFilthy && 2093 glodaFolder && 2094 glodaFolder.dirtyStatus != glodaFolder.kFolderFilthy 2095 ); 2096 }, 2097 2098 /* *********** Event Processing *********** */ 2099 2100 /** 2101 * Tracks messages we have received msgKeyChanged notifications for in order 2102 * to provide batching and to suppress needless reindexing when we receive 2103 * the expected follow-up msgsClassified notification. 2104 * 2105 * The entries in this dictionary should be extremely short-lived as we 2106 * receive the msgKeyChanged notification as the offline fake header is 2107 * converted into a real header (which is accompanied by a msgAdded 2108 * notification we don't pay attention to). Once the headers finish 2109 * updating, the message classifier will get its at-bat and should likely 2110 * find that the messages have already been classified and so fast-path 2111 * them. 2112 * 2113 * The keys in this dictionary are chosen to be consistent with those of 2114 * PendingCommitTracker: the folder.URI + "#" + the (new) message key. 2115 * The values in the dictionary are either an object with "id" (the gloda 2116 * id), "key" (the new message key), and "dirty" (is it dirty and so 2117 * should still be queued for indexing) attributes, or null indicating that 2118 * no change in message key occurred and so no database changes are required. 2119 */ 2120 _keyChangedBatchInfo: {}, 2121 2122 /** 2123 * Common logic for things that want to feed event-driven indexing. This gets 2124 * called by both |_msgFolderListener.msgsClassified| when we are first 2125 * seeing a message as well as by |_folderListener| when things happen to 2126 * existing messages. Although we could slightly specialize for the 2127 * new-to-us case, it works out to be cleaner to just treat them the same 2128 * and take a very small performance hit. 2129 * 2130 * @param aMsgHdrs array of messages to treat as potentially changed. 2131 * @param aDirtyingEvent Is this event inherently dirtying? Receiving a 2132 * msgsClassified notification is not inherently dirtying because it is 2133 * just telling us that a message exists. We use this knowledge to 2134 * ignore the msgsClassified notifications for messages we have received 2135 * msgKeyChanged notifications for and fast-pathed. Since it is possible 2136 * for user action to do something that dirties the message between the 2137 * time we get the msgKeyChanged notification and when we receive the 2138 * msgsClassified notification, we want to make sure we don't get 2139 * confused. (Although since we remove the message from our ignore-set 2140 * after the first notification, we would likely just mistakenly treat 2141 * the msgsClassified notification as something dirtying, so it would 2142 * still work out...) 2143 */ 2144 _reindexChangedMessages(aMsgHdrs, aDirtyingEvent) { 2145 let glodaIdsNeedingDeletion = null; 2146 let messageKeyChangedIds = null, 2147 messageKeyChangedNewKeys = null; 2148 for (let msgHdr of aMsgHdrs) { 2149 // -- Index this folder? 2150 let msgFolder = msgHdr.folder; 2151 if (!this.shouldIndexFolder(msgFolder)) { 2152 continue; 2153 } 2154 // -- Ignore messages in filthy folders! 2155 // A filthy folder can only be processed by an indexing sweep, and at 2156 // that point the message will get indexed. 2157 let glodaFolder = GlodaDatastore._mapFolder(msgHdr.folder); 2158 if (glodaFolder.dirtyStatus == glodaFolder.kFolderFilthy) { 2159 continue; 2160 } 2161 2162 // -- msgKeyChanged event follow-up 2163 if (!aDirtyingEvent) { 2164 let keyChangedKey = msgHdr.folder.URI + "#" + msgHdr.messageKey; 2165 if (keyChangedKey in this._keyChangedBatchInfo) { 2166 var keyChangedInfo = this._keyChangedBatchInfo[keyChangedKey]; 2167 delete this._keyChangedBatchInfo[keyChangedKey]; 2168 2169 // Null means to ignore this message because the key did not change 2170 // (and the message was not dirty so it is safe to ignore.) 2171 if (keyChangedInfo == null) { 2172 continue; 2173 } 2174 // (the key may be null if we only generated the entry because the 2175 // message was dirty) 2176 if (keyChangedInfo.key !== null) { 2177 if (messageKeyChangedIds == null) { 2178 messageKeyChangedIds = []; 2179 messageKeyChangedNewKeys = []; 2180 } 2181 messageKeyChangedIds.push(keyChangedInfo.id); 2182 messageKeyChangedNewKeys.push(keyChangedInfo.key); 2183 } 2184 // ignore the message because it was not dirty 2185 if (!keyChangedInfo.isDirty) { 2186 continue; 2187 } 2188 } 2189 } 2190 2191 // -- Index this message? 2192 // We index local messages, IMAP messages that are offline, and IMAP 2193 // messages that aren't offline but whose folders aren't offline either 2194 let isFolderLocal = msgFolder instanceof Ci.nsIMsgLocalMailFolder; 2195 if (!isFolderLocal) { 2196 if ( 2197 !(msgHdr.flags & Ci.nsMsgMessageFlags.Offline) && 2198 msgFolder.getFlag(Ci.nsMsgFolderFlags.Offline) 2199 ) { 2200 continue; 2201 } 2202 } 2203 // Ignore messages whose processing flags indicate it has not yet been 2204 // classified. In the IMAP case if the Offline flag is going to get set 2205 // we are going to see it before the msgsClassified event so this is 2206 // very important. 2207 if ( 2208 msgFolder.getProcessingFlags(msgHdr.messageKey) & 2209 NOT_YET_REPORTED_PROCESSING_FLAGS 2210 ) { 2211 continue; 2212 } 2213 2214 let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(msgHdr); 2215 2216 let isSpam = 2217 msgHdr.getStringProperty(JUNK_SCORE_PROPERTY) == JUNK_SPAM_SCORE_STR; 2218 2219 // -- Is the message currently gloda indexed? 2220 if ( 2221 glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && 2222 glodaDirty != this.kMessageFilthy 2223 ) { 2224 // - Is the message spam? 2225 if (isSpam) { 2226 // Treat this as a deletion... 2227 if (!glodaIdsNeedingDeletion) { 2228 glodaIdsNeedingDeletion = []; 2229 } 2230 glodaIdsNeedingDeletion.push(glodaId); 2231 // and skip to the next message 2232 continue; 2233 } 2234 2235 // - Mark the message dirty if it is clean. 2236 // (This is the only case in which we need to mark dirty so that the 2237 // indexing sweep takes care of things if we don't process this in 2238 // an event-driven fashion. If the message has no gloda-id or does 2239 // and it's already dirty or filthy, it is already marked for 2240 // indexing.) 2241 if (glodaDirty == this.kMessageClean) { 2242 msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, this.kMessageDirty); 2243 } 2244 // if the message is pending clean, this change invalidates that. 2245 PendingCommitTracker.noteDirtyHeader(msgHdr); 2246 } else if (isSpam) { 2247 // If it's not indexed but is spam, ignore it. 2248 continue; 2249 } 2250 // (we want to index the message if we are here) 2251 2252 // mark the folder dirty too, so we know to look inside 2253 glodaFolder._ensureFolderDirty(); 2254 2255 if (this._pendingAddJob == null) { 2256 this._pendingAddJob = new IndexingJob("message", null); 2257 GlodaIndexer.indexJob(this._pendingAddJob); 2258 } 2259 // only queue the message if we haven't overflowed our event-driven budget 2260 if (this._pendingAddJob.items.length < this._indexMaxEventQueueMessages) { 2261 this._pendingAddJob.items.push([ 2262 GlodaDatastore._mapFolder(msgFolder).id, 2263 msgHdr.messageKey, 2264 ]); 2265 } else { 2266 this.indexingSweepNeeded = true; 2267 } 2268 } 2269 2270 // Process any message key changes (from earlier msgKeyChanged events) 2271 if (messageKeyChangedIds != null) { 2272 GlodaDatastore.updateMessageKeys( 2273 messageKeyChangedIds, 2274 messageKeyChangedNewKeys 2275 ); 2276 } 2277 2278 // If we accumulated any deletions in there, batch them off now. 2279 if (glodaIdsNeedingDeletion) { 2280 GlodaDatastore.markMessagesDeletedByIDs(glodaIdsNeedingDeletion); 2281 this.pendingDeletions = true; 2282 } 2283 }, 2284 2285 /* ***** Folder Changes ***** */ 2286 /** 2287 * All additions and removals are queued for processing. Indexing messages 2288 * is potentially phenomenally expensive, and deletion can still be 2289 * relatively expensive due to our need to delete the message, its 2290 * attributes, and all attributes that reference it. Additionally, 2291 * attribute deletion costs are higher than attribute look-up because 2292 * there is the actual row plus its 3 indices, and our covering indices are 2293 * no help there. 2294 * 2295 */ 2296 _msgFolderListener: { 2297 indexer: null, 2298 2299 /** 2300 * We no longer use the msgAdded notification, instead opting to wait until 2301 * junk/trait classification has run (or decided not to run) and all 2302 * filters have run. The msgsClassified notification provides that for us. 2303 */ 2304 msgAdded(aMsgHdr) { 2305 // we are never called! we do not enable this bit! 2306 }, 2307 2308 /** 2309 * Process (apparently newly added) messages that have been looked at by 2310 * the message classifier. This ensures that if the message was going 2311 * to get marked as spam, this will have already happened. 2312 * 2313 * Besides truly new (to us) messages, We will also receive this event for 2314 * messages that are the result of IMAP message move/copy operations, 2315 * including both moves that generated offline fake headers and those that 2316 * did not. In the offline fake header case, however, we are able to 2317 * ignore their msgsClassified events because we will have received a 2318 * msgKeyChanged notification sometime in the recent past. 2319 */ 2320 msgsClassified(aMsgHdrs, aJunkClassified, aTraitClassified) { 2321 this.indexer._log.debug("msgsClassified notification"); 2322 try { 2323 GlodaMsgIndexer._reindexChangedMessages(aMsgHdrs, false); 2324 } catch (ex) { 2325 this.indexer._log.error("Explosion in msgsClassified handling:", ex); 2326 } 2327 }, 2328 2329 /** 2330 * Any messages which have had their junk state changed are marked for 2331 * reindexing. 2332 */ 2333 msgsJunkStatusChanged(messages) { 2334 this.indexer._log.debug("JunkStatusChanged notification"); 2335 GlodaMsgIndexer._reindexChangedMessages(messages, true); 2336 }, 2337 2338 /** 2339 * Handle real, actual deletion (move to trash and IMAP deletion model 2340 * don't count); we only see the deletion here when it becomes forever, 2341 * or rather _just before_ it becomes forever. Because the header is 2342 * going away, we need to either process things immediately or extract the 2343 * information required to purge it later without the header. 2344 * To this end, we mark all messages that were indexed in the gloda message 2345 * database as deleted. We set our pending deletions flag to let our 2346 * indexing logic know that after its next wave of folder traversal, it 2347 * should perform a deletion pass. If it turns out the messages are coming 2348 * back, the fact that deletion is thus deferred can be handy, as we can 2349 * reuse the existing gloda message. 2350 */ 2351 msgsDeleted(aMsgHdrs) { 2352 this.indexer._log.debug("msgsDeleted notification"); 2353 let glodaMessageIds = []; 2354 2355 for (let msgHdr of aMsgHdrs) { 2356 let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(msgHdr); 2357 if ( 2358 glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && 2359 glodaDirty != GlodaMsgIndexer.kMessageFilthy 2360 ) { 2361 glodaMessageIds.push(glodaId); 2362 } 2363 } 2364 2365 if (glodaMessageIds.length) { 2366 GlodaMsgIndexer._datastore.markMessagesDeletedByIDs(glodaMessageIds); 2367 GlodaMsgIndexer.pendingDeletions = true; 2368 } 2369 }, 2370 2371 /** 2372 * Process a move or copy. 2373 * 2374 * Moves to a local folder or an IMAP folder where we are generating offline 2375 * fake headers are dealt with efficiently because we get both the source 2376 * and destination headers. The main ingredient to having offline fake 2377 * headers is that allowUndo was true when the operation was performance. 2378 * The only non-obvious thing is that we need to make sure that we deal 2379 * with the impact of filthy folders and messages on gloda-id's (they 2380 * invalidate the gloda-id). 2381 * 2382 * Moves to an IMAP folder that do not generate offline fake headers do not 2383 * provide us with the target header, but the IMAP SetPendingAttributes 2384 * logic will still attempt to propagate the properties on the message 2385 * header so when we eventually see it in the msgsClassified notification, 2386 * it should have the properties of the source message copied over. 2387 * We make sure that gloda-id's do not get propagated when messages are 2388 * moved from IMAP folders that are marked filthy or are marked as not 2389 * supposed to be indexed by clearing the pending attributes for the header 2390 * being tracked by the destination IMAP folder. 2391 * We could fast-path the IMAP move case in msgsClassified by noticing that 2392 * a message is showing up with a gloda-id header already and just 2393 * performing an async location update. 2394 * 2395 * Moves that occur involving 'compacted' folders are fine and do not 2396 * require special handling here. The one tricky super-edge-case that 2397 * can happen (and gets handled by the compaction pass) is the move of a 2398 * message that got gloda indexed that did not already have a gloda-id and 2399 * PendingCommitTracker did not get to flush the gloda-id before the 2400 * compaction happened. In that case our move logic cannot know to do 2401 * anything and the gloda database still thinks the message lives in our 2402 * folder. The compaction pass will deal with this by marking the message 2403 * as deleted. The rationale being that marking it deleted allows the 2404 * message to be re-used if it gets indexed in the target location, or if 2405 * the target location has already been indexed, we no longer need the 2406 * duplicate and it should be deleted. (Also, it is unable to distinguish 2407 * between a case where the message got deleted versus moved.) 2408 * 2409 * Because copied messages are, by their nature, duplicate messages, we 2410 * do not particularly care about them. As such, we defer their processing 2411 * to the automatic sync logic that will happen much later on. This is 2412 * potentially desirable in case the user deletes some of the original 2413 * messages, allowing us to reuse the gloda message representations when 2414 * we finally get around to indexing the messages. We do need to mark the 2415 * folder as dirty, though, to clue in the sync logic. 2416 */ 2417 msgsMoveCopyCompleted(aMove, aSrcMsgHdrs, aDestFolder, aDestMsgHdrs) { 2418 this.indexer._log.debug("MoveCopy notification. Move: " + aMove); 2419 try { 2420 // ---- Move 2421 if (aMove) { 2422 // -- Effectively a deletion? 2423 // If the destination folder is not indexed, it's like these messages 2424 // are being deleted. 2425 if (!GlodaMsgIndexer.shouldIndexFolder(aDestFolder)) { 2426 this.msgsDeleted(aSrcMsgHdrs); 2427 return; 2428 } 2429 2430 // -- Avoid propagation of filthy gloda-id's. 2431 // If the source folder is filthy or should not be indexed (and so 2432 // any gloda-id's found in there are gibberish), our only job is to 2433 // strip the gloda-id's off of all the destination headers because 2434 // none of the gloda-id's are valid (and so we certainly don't want 2435 // to try and use them as a basis for updating message keys.) 2436 let srcMsgFolder = aSrcMsgHdrs[0].folder; 2437 if ( 2438 !this.indexer.shouldIndexFolder(srcMsgFolder) || 2439 GlodaDatastore._mapFolder(srcMsgFolder).dirtyStatus == 2440 GlodaFolder.prototype.kFolderFilthy 2441 ) { 2442 // Local case, just modify the destination headers directly. 2443 if (aDestMsgHdrs.length > 0) { 2444 for (let destMsgHdr of aDestMsgHdrs) { 2445 // zero it out if it exists 2446 // (no need to deal with pending commit issues here; a filthy 2447 // folder by definition has nothing indexed in it.) 2448 let glodaId = destMsgHdr.getUint32Property( 2449 GLODA_MESSAGE_ID_PROPERTY 2450 ); 2451 if (glodaId) { 2452 destMsgHdr.setUint32Property(GLODA_MESSAGE_ID_PROPERTY, 0); 2453 } 2454 } 2455 2456 // Since we are moving messages from a folder where they were 2457 // effectively not indexed, it is up to us to make sure the 2458 // messages now get indexed. 2459 this.indexer._reindexChangedMessages(aDestMsgHdrs); 2460 return; 2461 } 2462 2463 // IMAP move case, we need to operate on the pending headers using 2464 // the source header to get the pending header and as the 2465 // indication of what has been already set on the pending header. 2466 let destDb; 2467 // so, this can fail, and there's not much we can do about it. 2468 try { 2469 destDb = aDestFolder.msgDatabase; 2470 } catch (ex) { 2471 this.indexer._log.warn( 2472 "Destination database for " + 2473 aDestFolder.prettyName + 2474 " not ready on IMAP move." + 2475 " Gloda corruption possible." 2476 ); 2477 return; 2478 } 2479 for (let srcMsgHdr of aSrcMsgHdrs) { 2480 // zero it out if it exists 2481 // (no need to deal with pending commit issues here; a filthy 2482 // folder by definition has nothing indexed in it.) 2483 let glodaId = srcMsgHdr.getUint32Property( 2484 GLODA_MESSAGE_ID_PROPERTY 2485 ); 2486 if (glodaId) { 2487 destDb.setUint32AttributeOnPendingHdr( 2488 srcMsgHdr, 2489 GLODA_MESSAGE_ID_PROPERTY, 2490 0 2491 ); 2492 } 2493 } 2494 2495 // Nothing remains to be done. The msgClassified event will take 2496 // care of making sure the message gets indexed. 2497 return; 2498 } 2499 2500 // --- Have destination headers (local case): 2501 if (aDestMsgHdrs.length > 0) { 2502 // -- Update message keys for valid gloda-id's. 2503 // (Which means ignore filthy gloda-id's.) 2504 let glodaIds = []; 2505 let newMessageKeys = []; 2506 // Track whether we see any messages that are not gloda indexed so 2507 // we know if we have to mark the destination folder dirty. 2508 let sawNonGlodaMessage = false; 2509 for (let iMsg = 0; iMsg < aSrcMsgHdrs.length; iMsg++) { 2510 let srcMsgHdr = aSrcMsgHdrs[iMsg]; 2511 let destMsgHdr = aDestMsgHdrs[iMsg]; 2512 2513 let [glodaId, dirtyStatus] = PendingCommitTracker.getGlodaState( 2514 srcMsgHdr 2515 ); 2516 if ( 2517 glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && 2518 dirtyStatus != GlodaMsgIndexer.kMessageFilthy 2519 ) { 2520 // we may need to update the pending commit map (it checks) 2521 PendingCommitTracker.noteMove(srcMsgHdr, destMsgHdr); 2522 // but we always need to update our database 2523 glodaIds.push(glodaId); 2524 newMessageKeys.push(destMsgHdr.messageKey); 2525 } else { 2526 sawNonGlodaMessage = true; 2527 } 2528 } 2529 2530 // this method takes care to update the in-memory representations 2531 // too; we don't need to do anything 2532 if (glodaIds.length) { 2533 GlodaDatastore.updateMessageLocations( 2534 glodaIds, 2535 newMessageKeys, 2536 aDestFolder 2537 ); 2538 } 2539 2540 // Mark the destination folder dirty if we saw any messages that 2541 // were not already gloda indexed. 2542 if (sawNonGlodaMessage) { 2543 let destGlodaFolder = GlodaDatastore._mapFolder(aDestFolder); 2544 destGlodaFolder._ensureFolderDirty(); 2545 this.indexer.indexingSweepNeeded = true; 2546 } 2547 } else { 2548 // --- No dest headers (IMAP case): 2549 // Update any valid gloda indexed messages into their new folder to 2550 // make the indexer's life easier when it sees the messages in their 2551 // new folder. 2552 let glodaIds = []; 2553 2554 let srcFolderIsLocal = 2555 srcMsgFolder instanceof Ci.nsIMsgLocalMailFolder; 2556 for (let msgHdr of aSrcMsgHdrs) { 2557 let [glodaId, dirtyStatus] = PendingCommitTracker.getGlodaState( 2558 msgHdr 2559 ); 2560 if ( 2561 glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && 2562 dirtyStatus != GlodaMsgIndexer.kMessageFilthy 2563 ) { 2564 // we may need to update the pending commit map (it checks) 2565 PendingCommitTracker.noteBlindMove(msgHdr); 2566 // but we always need to update our database 2567 glodaIds.push(glodaId); 2568 2569 // XXX UNDO WORKAROUND 2570 // This constitutes a move from a local folder to an IMAP 2571 // folder. Undo does not currently do the right thing for us, 2572 // but we have a chance of not orphaning the message if we 2573 // mark the source header as dirty so that when the message 2574 // gets re-added we see it. (This does require that we enter 2575 // the folder; we set the folder dirty after the loop to 2576 // increase the probability of this but it's not foolproof 2577 // depending on when the next indexing sweep happens and when 2578 // the user performs an undo.) 2579 msgHdr.setUint32Property( 2580 GLODA_DIRTY_PROPERTY, 2581 GlodaMsgIndexer.kMessageDirty 2582 ); 2583 } 2584 } 2585 // XXX ALSO UNDO WORKAROUND 2586 if (srcFolderIsLocal) { 2587 let srcGlodaFolder = GlodaDatastore._mapFolder(srcMsgFolder); 2588 srcGlodaFolder._ensureFolderDirty(); 2589 } 2590 2591 // quickly move them to the right folder, zeroing their message keys 2592 GlodaDatastore.updateMessageFoldersByKeyPurging( 2593 glodaIds, 2594 aDestFolder 2595 ); 2596 // we _do not_ need to mark the folder as dirty, because the 2597 // message added events will cause that to happen. 2598 } 2599 } else { 2600 // ---- Copy case 2601 // -- Do not propagate gloda-id's for copies 2602 // (Only applies if we have the destination header, which means local) 2603 for (let destMsgHdr of aDestMsgHdrs) { 2604 let glodaId = destMsgHdr.getUint32Property( 2605 GLODA_MESSAGE_ID_PROPERTY 2606 ); 2607 if (glodaId) { 2608 destMsgHdr.setUint32Property(GLODA_MESSAGE_ID_PROPERTY, 0); 2609 } 2610 } 2611 2612 // mark the folder as dirty; we'll get to it later. 2613 let destGlodaFolder = GlodaDatastore._mapFolder(aDestFolder); 2614 destGlodaFolder._ensureFolderDirty(); 2615 this.indexer.indexingSweepNeeded = true; 2616 } 2617 } catch (ex) { 2618 this.indexer._log.error( 2619 "Problem encountered during message move/copy:", 2620 ex.stack 2621 ); 2622 } 2623 }, 2624 2625 /** 2626 * Queue up message key changes that are a result of offline fake headers 2627 * being made real for the actual update during the msgsClassified 2628 * notification that is expected after this. We defer the 2629 * actual work (if there is any to be done; the fake header might have 2630 * guessed the right UID correctly) so that we can batch our work. 2631 * 2632 * The expectation is that there will be no meaningful time window between 2633 * this notification and the msgsClassified notification since the message 2634 * classifier should not actually need to classify the messages (they 2635 * should already have been classified) and so can fast-path them. 2636 */ 2637 msgKeyChanged(aOldMsgKey, aNewMsgHdr) { 2638 try { 2639 let val = null, 2640 newKey = aNewMsgHdr.messageKey; 2641 let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState( 2642 aNewMsgHdr 2643 ); 2644 // If we haven't indexed this message yet, take no action, and leave it 2645 // up to msgsClassified to take proper action. 2646 if (glodaId < GLODA_FIRST_VALID_MESSAGE_ID) { 2647 return; 2648 } 2649 // take no action on filthy messages, 2650 // generate an entry if dirty or the keys don't match. 2651 if ( 2652 glodaDirty !== GlodaMsgIndexer.kMessageFilthy && 2653 (glodaDirty === GlodaMsgIndexer.kMessageDirty || 2654 aOldMsgKey !== newKey) 2655 ) { 2656 val = { 2657 id: glodaId, 2658 key: aOldMsgKey !== newKey ? newKey : null, 2659 isDirty: glodaDirty === GlodaMsgIndexer.kMessageDirty, 2660 }; 2661 } 2662 2663 let key = aNewMsgHdr.folder.URI + "#" + aNewMsgHdr.messageKey; 2664 this.indexer._keyChangedBatchInfo[key] = val; 2665 } catch (ex) { 2666 // this is more for the unit test to fail rather than user error reporting 2667 this.indexer._log.error( 2668 "Problem encountered during msgKeyChanged" + 2669 " notification handling: " + 2670 ex + 2671 "\n\n" + 2672 ex.stack + 2673 " \n\n" 2674 ); 2675 } 2676 }, 2677 2678 /** 2679 * Detect newly added folders before they get messages so we map them before 2680 * they get any messages added to them. If we only hear about them after 2681 * they get their 1st message, then we will mark them filthy, but if we mark 2682 * them before that, they get marked clean. 2683 */ 2684 folderAdded(aMsgFolder) { 2685 // This is invoked for its side-effect of invoking _mapFolder and doing so 2686 // only after filtering out folders we don't care about. 2687 GlodaMsgIndexer.shouldIndexFolder(aMsgFolder); 2688 }, 2689 2690 /** 2691 * Handles folder no-longer-exists-ence. We mark all messages as deleted 2692 * and remove the folder from our URI table. Currently, if a folder that 2693 * contains other folders is deleted, we may either receive one 2694 * notification for the folder that is deleted, or a notification for the 2695 * folder and one for each of its descendents. This depends upon the 2696 * underlying account implementation, so we explicitly handle each case. 2697 * Namely, we treat it as if we're only planning on getting one, but we 2698 * handle if the children are already gone for some reason. 2699 */ 2700 folderDeleted(aFolder) { 2701 this.indexer._log.debug("folderDeleted notification"); 2702 try { 2703 let delFunc = function(aFolder, indexer) { 2704 if (indexer._datastore._folderKnown(aFolder)) { 2705 indexer._log.info( 2706 "Processing deletion of folder " + aFolder.prettyName + "." 2707 ); 2708 let glodaFolder = GlodaDatastore._mapFolder(aFolder); 2709 indexer._datastore.markMessagesDeletedByFolderID(glodaFolder.id); 2710 indexer._datastore.deleteFolderByID(glodaFolder.id); 2711 GlodaDatastore._killGlodaFolderIntoTombstone(glodaFolder); 2712 } else { 2713 indexer._log.info( 2714 "Ignoring deletion of folder " + 2715 aFolder.prettyName + 2716 " because it is unknown to gloda." 2717 ); 2718 } 2719 }; 2720 2721 let descendentFolders = aFolder.descendants; 2722 // (the order of operations does not matter; child, non-child, whatever.) 2723 // delete the parent 2724 delFunc(aFolder, this.indexer); 2725 // delete all its descendents 2726 for (let folder of descendentFolders) { 2727 delFunc(folder, this.indexer); 2728 } 2729 2730 this.indexer.pendingDeletions = true; 2731 } catch (ex) { 2732 this.indexer._log.error( 2733 "Problem encountered during folder deletion" + 2734 ": " + 2735 ex + 2736 "\n\n" + 2737 ex.stack + 2738 "\n\n" 2739 ); 2740 } 2741 }, 2742 2743 /** 2744 * Handle a folder being copied or moved. 2745 * Moves are handled by a helper function shared with _folderRenameHelper 2746 * (which takes care of any nesting involved). 2747 * Copies are actually ignored, because our periodic indexing traversal 2748 * should discover these automatically. We could hint ourselves into 2749 * action, but arguably a set of completely duplicate messages is not 2750 * a high priority for indexing. 2751 */ 2752 folderMoveCopyCompleted(aMove, aSrcFolder, aDestFolder) { 2753 this.indexer._log.debug( 2754 "folderMoveCopy notification (Move: " + aMove + ")" 2755 ); 2756 if (aMove) { 2757 let srcURI = aSrcFolder.URI; 2758 let targetURI = 2759 aDestFolder.URI + srcURI.substring(srcURI.lastIndexOf("/")); 2760 this._folderRenameHelper(aSrcFolder, targetURI); 2761 } else { 2762 this.indexer.indexingSweepNeeded = true; 2763 } 2764 }, 2765 2766 /** 2767 * We just need to update the URI <-> ID maps and the row in the database, 2768 * all of which is actually done by the datastore for us. 2769 * This method needs to deal with the complexity where local folders will 2770 * generate a rename notification for each sub-folder, but IMAP folders 2771 * will generate only a single notification. Our logic primarily handles 2772 * this by not exploding if the original folder no longer exists. 2773 */ 2774 _folderRenameHelper(aOrigFolder, aNewURI) { 2775 let newFolder = MailUtils.getOrCreateFolder(aNewURI); 2776 let specialFolderFlags = 2777 Ci.nsMsgFolderFlags.Trash | Ci.nsMsgFolderFlags.Junk; 2778 if (newFolder.isSpecialFolder(specialFolderFlags, true)) { 2779 let descendentFolders = newFolder.descendants; 2780 2781 // First thing to do: make sure we don't index the resulting folder and 2782 // its descendents. 2783 GlodaMsgIndexer.resetFolderIndexingPriority(newFolder); 2784 for (let folder of descendentFolders) { 2785 GlodaMsgIndexer.resetFolderIndexingPriority(folder); 2786 } 2787 2788 // Remove from the index messages from the original folder 2789 this.folderDeleted(aOrigFolder); 2790 } else { 2791 let descendentFolders = aOrigFolder.descendants; 2792 2793 let origURI = aOrigFolder.URI; 2794 // this rename is straightforward. 2795 GlodaDatastore.renameFolder(aOrigFolder, aNewURI); 2796 2797 for (let folder of descendentFolders) { 2798 let oldSubURI = folder.URI; 2799 // mangle a new URI from the old URI. we could also try and do a 2800 // parallel traversal of the new folder hierarchy, but that seems like 2801 // more work. 2802 let newSubURI = aNewURI + oldSubURI.substring(origURI.length); 2803 this.indexer._datastore.renameFolder(oldSubURI, newSubURI); 2804 } 2805 2806 this.indexer._log.debug( 2807 "folder renamed: " + origURI + " to " + aNewURI 2808 ); 2809 } 2810 }, 2811 2812 /** 2813 * Handle folder renames, dispatching to our rename helper (which also 2814 * takes care of any nested folder issues.) 2815 */ 2816 folderRenamed(aOrigFolder, aNewFolder) { 2817 this._folderRenameHelper(aOrigFolder, aNewFolder.URI); 2818 }, 2819 2820 /** 2821 * Helper used by folderCompactStart/folderReindexTriggered. 2822 */ 2823 _reindexFolderHelper(folder, isCompacting) { 2824 // ignore folders we ignore... 2825 if (!GlodaMsgIndexer.shouldIndexFolder(folder)) { 2826 return; 2827 } 2828 2829 let glodaFolder = GlodaDatastore._mapFolder(folder); 2830 if (isCompacting) { 2831 glodaFolder.compacting = true; 2832 } 2833 2834 // Purge any explicit indexing of said folder. 2835 GlodaIndexer.purgeJobsUsingFilter(function(aJob) { 2836 return aJob.jobType == "folder" && aJob.id == folder.id; 2837 }); 2838 2839 // Abort the active job if it's in the folder (this covers both 2840 // event-driven indexing that happens to be in the folder as well 2841 // explicit folder indexing of the folder). 2842 if (GlodaMsgIndexer._indexingFolder == folder) { 2843 GlodaIndexer.killActiveJob(); 2844 } 2845 2846 // Tell the PendingCommitTracker to throw away anything it is tracking 2847 // about the folder. We will pick up the pieces in the compaction 2848 // pass. 2849 PendingCommitTracker.noteFolderDatabaseGettingBlownAway(folder); 2850 2851 // (We do not need to mark the folder dirty because if we were indexing 2852 // it, it already must have been marked dirty.) 2853 }, 2854 2855 /** 2856 * folderCompactStart: Mark the folder as compacting in our in-memory 2857 * representation. This should keep any new indexing out of the folder 2858 * until it is done compacting. Also, kill any active or existing jobs 2859 * to index the folder. 2860 */ 2861 folderCompactStart(folder) { 2862 this._reindexFolderHelper(folder, true); 2863 }, 2864 2865 /** 2866 * folderReindexTriggered: We do the same thing as folderCompactStart 2867 * but don't mark the folder as compacting. 2868 */ 2869 folderReindexTriggered(folder) { 2870 this._reindexFolderHelper(folder, false); 2871 }, 2872 2873 /** 2874 * folderCompactFinish: Mark the folder as done compacting in our 2875 * in-memory representation. Assuming the folder was known to us and 2876 * not marked filthy, queue a compaction job. 2877 */ 2878 folderCompactFinish(folder) { 2879 // ignore folders we ignore... 2880 if (!GlodaMsgIndexer.shouldIndexFolder(folder)) { 2881 return; 2882 } 2883 2884 let glodaFolder = GlodaDatastore._mapFolder(folder); 2885 glodaFolder.compacting = false; 2886 glodaFolder._setCompactedState(true); 2887 2888 // Queue compaction unless the folder was filthy (in which case there 2889 // are no valid gloda-id's to update.) 2890 if (glodaFolder.dirtyStatus != glodaFolder.kFolderFilthy) { 2891 GlodaIndexer.indexJob(new IndexingJob("folderCompact", glodaFolder.id)); 2892 } 2893 2894 // Queue indexing of the folder if it is dirty. We are doing this 2895 // mainly in case we were indexing it before the compaction started. 2896 // It should be reasonably harmless if we weren't. 2897 // (It would probably be better to just make sure that there is an 2898 // indexing sweep queued or active, and if it's already active that 2899 // this folder is in the queue to be processed.) 2900 if (glodaFolder.dirtyStatus == glodaFolder.kFolderDirty) { 2901 GlodaIndexer.indexJob(new IndexingJob("folder", glodaFolder.id)); 2902 } 2903 }, 2904 }, 2905 2906 /** 2907 * A nsIFolderListener (listening on nsIMsgMailSession so we get all of 2908 * these events) PRIMARILY to get folder loaded notifications. Because of 2909 * deficiencies in the nsIMsgFolderListener's events at this time, we also 2910 * get our folder-added and newsgroup notifications from here for now. (This 2911 * will be rectified.) 2912 */ 2913 _folderListener: { 2914 indexer: null, 2915 2916 _init(aIndexer) { 2917 this.indexer = aIndexer; 2918 }, 2919 2920 OnItemAdded(aParentItem, aItem) {}, 2921 OnItemRemoved(aParentItem, aItem) {}, 2922 OnItemPropertyChanged(aItem, aProperty, aOldValue, aNewValue) {}, 2923 /** 2924 * Detect changes to folder flags and reset our indexing priority. This 2925 * is important because (all?) folders start out without any flags and 2926 * then get their flags added to them. 2927 */ 2928 OnItemIntPropertyChanged(aFolderItem, aProperty, aOldValue, aNewValue) { 2929 if (aProperty !== "FolderFlag") { 2930 return; 2931 } 2932 if (!GlodaMsgIndexer.shouldIndexFolder(aFolderItem)) { 2933 return; 2934 } 2935 // Only reset priority if folder Special Use changes. 2936 if ( 2937 (aOldValue & Ci.nsMsgFolderFlags.SpecialUse) == 2938 (aNewValue & Ci.nsMsgFolderFlags.SpecialUse) 2939 ) { 2940 return; 2941 } 2942 GlodaMsgIndexer.resetFolderIndexingPriority(aFolderItem); 2943 }, 2944 OnItemBoolPropertyChanged(aItem, aProperty, aOldValue, aNewValue) {}, 2945 OnItemUnicharPropertyChanged(aItem, aProperty, aOldValue, aNewValue) {}, 2946 /** 2947 * Notice when user activity adds/removes tags or changes a message's 2948 * status. 2949 */ 2950 OnItemPropertyFlagChanged(aMsgHdr, aProperty, aOldValue, aNewValue) { 2951 if ( 2952 aProperty == "Keywords" || 2953 // We could care less about the new flag changing. 2954 (aProperty == "Status" && 2955 (aOldValue ^ aNewValue) != Ci.nsMsgMessageFlags.New && 2956 // We do care about IMAP deletion, but msgsDeleted tells us that, so 2957 // ignore IMAPDeleted too... 2958 (aOldValue ^ aNewValue) != Ci.nsMsgMessageFlags.IMAPDeleted) || 2959 aProperty == "Flagged" 2960 ) { 2961 GlodaMsgIndexer._reindexChangedMessages([aMsgHdr], true); 2962 } 2963 }, 2964 2965 /** 2966 * Get folder loaded notifications for folders that had to do some 2967 * (asynchronous) processing before they could be opened. 2968 */ 2969 OnItemEvent(aFolder, aEvent) { 2970 if (aEvent == "FolderLoaded") { 2971 this.indexer._onFolderLoaded(aFolder); 2972 } 2973 }, 2974 }, 2975 2976 /* ***** Rebuilding / Reindexing ***** */ 2977 /** 2978 * Allow us to invalidate an outstanding folder traversal because the 2979 * underlying database is going away. We use other means for detecting 2980 * modifications of the message (labeling, marked (un)read, starred, etc.) 2981 * 2982 * This is an nsIDBChangeListener listening to an nsIDBChangeAnnouncer. To 2983 * add ourselves, we get us a nice nsMsgDatabase, query it to the announcer, 2984 * then call AddListener. 2985 */ 2986 _databaseAnnouncerListener: { 2987 indexer: null, 2988 /** 2989 * XXX We really should define the operations under which we expect this to 2990 * occur. While we know this must be happening as the result of a 2991 * ForceClosed call, we don't have a comprehensive list of when this is 2992 * expected to occur. Some reasons: 2993 * - Compaction (although we should already have killed the job thanks to 2994 * our compaction notification) 2995 * - UID validity rolls. 2996 * - Folder Rename 2997 * - Folder Delete 2998 * The fact that we already have the database open when getting this means 2999 * that it had to be valid before we opened it, which hopefully rules out 3000 * modification of the mbox file by an external process (since that is 3001 * forbidden when we are running) and many other exotic things. 3002 * 3003 * So this really ends up just being a correctness / safety protection 3004 * mechanism. At least now that we have better compaction support. 3005 */ 3006 onAnnouncerGoingAway(aDBChangeAnnouncer) { 3007 // The fact that we are getting called means we have an active folder and 3008 // that we therefore are the active job. As such, we must kill the 3009 // active job. 3010 // XXX In the future, when we support interleaved event-driven indexing 3011 // that bumps long-running indexing tasks, the semantics of this will 3012 // have to change a bit since we will want to maintain being active in a 3013 // folder even when bumped. However, we will probably have a more 3014 // complex notion of indexing contexts on a per-job basis. 3015 GlodaIndexer.killActiveJob(); 3016 }, 3017 3018 onHdrFlagsChanged(aHdrChanged, aOldFlags, aNewFlags, aInstigator) {}, 3019 onHdrDeleted(aHdrChanged, aParentKey, aFlags, aInstigator) {}, 3020 onHdrAdded(aHdrChanged, aParentKey, aFlags, aInstigator) {}, 3021 onParentChanged(aKeyChanged, aOldParent, aNewParent, aInstigator) {}, 3022 onReadChanged(aInstigator) {}, 3023 onJunkScoreChanged(aInstigator) {}, 3024 onHdrPropertyChanged(aHdrToChange, aPreChange, aStatus, aInstigator) {}, 3025 onEvent(aDB, aEvent) {}, 3026 }, 3027 3028 /** 3029 * Given a list of Message-ID's, return a matching list of lists of messages 3030 * matching those Message-ID's. So if you pass an array with three 3031 * Message-ID's ["a", "b", "c"], you would get back an array containing 3032 * 3 lists, where the first list contains all the messages with a message-id 3033 * of "a", and so forth. The reason a list is returned rather than null/a 3034 * message is that we accept the reality that we have multiple copies of 3035 * messages with the same ID. 3036 * This call is asynchronous because it depends on previously created messages 3037 * to be reflected in our results, which requires us to execute on the async 3038 * thread where all our writes happen. This also turns out to be a 3039 * reasonable thing because we could imagine pathological cases where there 3040 * could be a lot of message-id's and/or a lot of messages with those 3041 * message-id's. 3042 * 3043 * The returned collection will include both 'ghost' messages (messages 3044 * that exist for conversation-threading purposes only) as well as deleted 3045 * messages in addition to the normal 'live' messages that non-privileged 3046 * queries might return. 3047 */ 3048 getMessagesByMessageID(aMessageIDs, aCallback, aCallbackThis) { 3049 let msgIDToIndex = {}; 3050 let results = []; 3051 for (let iID = 0; iID < aMessageIDs.length; ++iID) { 3052 let msgID = aMessageIDs[iID]; 3053 results.push([]); 3054 msgIDToIndex[msgID] = iID; 3055 } 3056 3057 // (Note: although we are performing a lookup with no validity constraints 3058 // and using the same object-relational-mapper-ish layer used by things 3059 // that do have constraints, we are not at risk of exposing deleted 3060 // messages to other code and getting it confused. The only way code 3061 // can find a message is if it shows up in their queries or gets announced 3062 // via GlodaCollectionManager.itemsAdded, neither of which will happen.) 3063 let query = Gloda.newQuery(Gloda.NOUN_MESSAGE, { 3064 noDbQueryValidityConstraints: true, 3065 }); 3066 query.headerMessageID.apply(query, aMessageIDs); 3067 query.frozen = true; 3068 3069 let listener = new MessagesByMessageIdCallback( 3070 msgIDToIndex, 3071 results, 3072 aCallback, 3073 aCallbackThis 3074 ); 3075 return query.getCollection(listener, null, { becomeNull: true }); 3076 }, 3077 3078 /** 3079 * A reference to MsgHdrToMimeMessage that unit testing can clobber when it 3080 * wants to cause us to hang or inject a fault. If you are not 3081 * glodaTestHelper.js then _do not touch this_. 3082 */ 3083 _MsgHdrToMimeMessageFunc: MsgHdrToMimeMessage, 3084 /** 3085 * Primary message indexing logic. This method is mainly concerned with 3086 * getting all the information about the message required for threading / 3087 * conversation building and subsequent processing. It is responsible for 3088 * determining whether to reuse existing gloda messages or whether a new one 3089 * should be created. Most attribute stuff happens in fund_attr.js or 3090 * expl_attr.js. 3091 * 3092 * Prior to calling this method, the caller must have invoked 3093 * |_indexerEnterFolder|, leaving us with the following true invariants 3094 * below. 3095 * 3096 * @pre aMsgHdr.folder == this._indexingFolder 3097 * @pre aMsgHdr.folder.msgDatabase == this._indexingDatabase 3098 */ 3099 *_indexMessage(aMsgHdr, aCallbackHandle) { 3100 this._log.debug( 3101 "*** Indexing message: " + aMsgHdr.messageKey + " : " + aMsgHdr.subject 3102 ); 3103 3104 // If the message is offline, then get the message body as well 3105 let aMimeMsg; 3106 if ( 3107 aMsgHdr.flags & Ci.nsMsgMessageFlags.Offline || 3108 aMsgHdr.folder instanceof Ci.nsIMsgLocalMailFolder 3109 ) { 3110 this._MsgHdrToMimeMessageFunc( 3111 aMsgHdr, 3112 aCallbackHandle.callbackThis, 3113 aCallbackHandle.callback, 3114 false, 3115 { saneBodySize: true } 3116 ); 3117 aMimeMsg = (yield this.kWorkAsync)[1]; 3118 } else { 3119 this._log.debug(" * Message is not offline -- only headers indexed"); 3120 } 3121 3122 this._log.debug(" * Got message, subject " + aMsgHdr.subject); 3123 3124 if (this._unitTestSuperVerbose) { 3125 if (aMimeMsg) { 3126 this._log.debug(" * Got Mime " + aMimeMsg.prettyString()); 3127 } else { 3128 this._log.debug(" * NO MIME MESSAGE!!!\n"); 3129 } 3130 } 3131 3132 // -- Find/create the conversation the message belongs to. 3133 // Our invariant is that all messages that exist in the database belong to 3134 // a conversation. 3135 3136 // - See if any of the ancestors exist and have a conversationID... 3137 // (references are ordered from old [0] to new [n-1]) 3138 let references = Array.from(range(0, aMsgHdr.numReferences)).map(i => 3139 aMsgHdr.getStringReference(i) 3140 ); 3141 // also see if we already know about the message... 3142 references.push(aMsgHdr.messageId); 3143 3144 this.getMessagesByMessageID( 3145 references, 3146 aCallbackHandle.callback, 3147 aCallbackHandle.callbackThis 3148 ); 3149 // (ancestorLists has a direct correspondence to the message ids) 3150 let ancestorLists = yield this.kWorkAsync; 3151 3152 this._log.debug("ancestors raw: " + ancestorLists); 3153 this._log.debug( 3154 "ref len: " + references.length + " anc len: " + ancestorLists.length 3155 ); 3156 this._log.debug("references: " + references); 3157 this._log.debug("ancestors: " + ancestorLists); 3158 3159 // pull our current message lookup results off 3160 references.pop(); 3161 let candidateCurMsgs = ancestorLists.pop(); 3162 3163 let conversationID = null; 3164 let conversation = null; 3165 // -- figure out the conversation ID 3166 // if we have a clone/already exist, just use his conversation ID 3167 if (candidateCurMsgs.length > 0) { 3168 conversationID = candidateCurMsgs[0].conversationID; 3169 conversation = candidateCurMsgs[0].conversation; 3170 } else { 3171 // otherwise check out our ancestors 3172 // (walk from closest to furthest ancestor) 3173 for ( 3174 let iAncestor = ancestorLists.length - 1; 3175 iAncestor >= 0; 3176 --iAncestor 3177 ) { 3178 let ancestorList = ancestorLists[iAncestor]; 3179 3180 if (ancestorList.length > 0) { 3181 // we only care about the first instance of the message because we are 3182 // able to guarantee the invariant that all messages with the same 3183 // message id belong to the same conversation. 3184 let ancestor = ancestorList[0]; 3185 if (conversationID === null) { 3186 conversationID = ancestor.conversationID; 3187 conversation = ancestor.conversation; 3188 } else if (conversationID != ancestor.conversationID) { 3189 // XXX this inconsistency is known and understood and tracked by 3190 // bug 478162 https://bugzilla.mozilla.org/show_bug.cgi?id=478162 3191 // this._log.error("Inconsistency in conversations invariant on " + 3192 // ancestor.headerMessageID + ". It has conv id " + 3193 // ancestor.conversationID + " but expected " + 3194 // conversationID + ". ID: " + ancestor.id); 3195 } 3196 } 3197 } 3198 } 3199 3200 // nobody had one? create a new conversation 3201 if (conversationID === null) { 3202 // (the create method could issue the id, making the call return 3203 // without waiting for the database...) 3204 conversation = this._datastore.createConversation( 3205 aMsgHdr.mime2DecodedSubject, 3206 null, 3207 null 3208 ); 3209 conversationID = conversation.id; 3210 } 3211 3212 // Walk from furthest to closest ancestor, creating the ancestors that don't 3213 // exist. (This is possible if previous messages that were consumed in this 3214 // thread only had an in-reply-to or for some reason did not otherwise 3215 // provide the full references chain.) 3216 for (let iAncestor = 0; iAncestor < ancestorLists.length; ++iAncestor) { 3217 let ancestorList = ancestorLists[iAncestor]; 3218 3219 if (ancestorList.length == 0) { 3220 this._log.debug( 3221 "creating message with: null, " + 3222 conversationID + 3223 ", " + 3224 references[iAncestor] + 3225 ", null." 3226 ); 3227 let ancestor = this._datastore.createMessage( 3228 null, 3229 null, // ghost 3230 conversationID, 3231 null, 3232 references[iAncestor], 3233 null, // no subject 3234 null, // no body 3235 null 3236 ); // no attachments 3237 this._datastore.insertMessage(ancestor); 3238 ancestorLists[iAncestor].push(ancestor); 3239 } 3240 } 3241 // now all our ancestors exist, though they may be ghost-like... 3242 3243 // find if there's a ghost version of our message or we already have indexed 3244 // this message. 3245 let curMsg = null; 3246 this._log.debug(candidateCurMsgs.length + " candidate messages"); 3247 for (let iCurCand = 0; iCurCand < candidateCurMsgs.length; iCurCand++) { 3248 let candMsg = candidateCurMsgs[iCurCand]; 3249 3250 this._log.debug( 3251 "candidate folderID: " + 3252 candMsg.folderID + 3253 " messageKey: " + 3254 candMsg.messageKey 3255 ); 3256 3257 if (candMsg.folderURI == this._indexingFolder.URI) { 3258 // if we are in the same folder and we have the same message key, we 3259 // are definitely the same, stop looking. 3260 if (candMsg.messageKey == aMsgHdr.messageKey) { 3261 curMsg = candMsg; 3262 break; 3263 } 3264 // if (we are in the same folder and) the candidate message has a null 3265 // message key, we treat it as our best option unless we find an exact 3266 // key match. (this would happen because the 'move' notification case 3267 // has to deal with not knowing the target message key. this case 3268 // will hopefully be somewhat improved in the future to not go through 3269 // this path which mandates re-indexing of the message in its entirety) 3270 if (candMsg.messageKey === null) { 3271 curMsg = candMsg; 3272 } else if ( 3273 curMsg === null && 3274 !this._indexingDatabase.ContainsKey(candMsg.messageKey) 3275 ) { 3276 // (We are in the same folder and) the candidate message's underlying 3277 // message no longer exists/matches. Assume we are the same but 3278 // were betrayed by a re-indexing or something, but we have to make 3279 // sure a perfect match doesn't turn up. 3280 curMsg = candMsg; 3281 } 3282 } else if (curMsg === null && candMsg.folderID === null) { 3283 // a ghost/deleted message is fine 3284 curMsg = candMsg; 3285 } 3286 } 3287 3288 let attachmentNames = null; 3289 if (aMimeMsg) { 3290 attachmentNames = aMimeMsg.allAttachments 3291 .filter(att => att.isRealAttachment) 3292 .map(att => att.name); 3293 } 3294 3295 let isConceptuallyNew, isRecordNew, insertFulltext; 3296 if (curMsg === null) { 3297 curMsg = this._datastore.createMessage( 3298 aMsgHdr.folder, 3299 aMsgHdr.messageKey, 3300 conversationID, 3301 aMsgHdr.date, 3302 aMsgHdr.messageId 3303 ); 3304 curMsg._conversation = conversation; 3305 isConceptuallyNew = isRecordNew = insertFulltext = true; 3306 } else { 3307 isRecordNew = false; 3308 // the message is conceptually new if it was a ghost or dead. 3309 isConceptuallyNew = curMsg._isGhost || curMsg._isDeleted; 3310 // insert fulltext if it was a ghost 3311 insertFulltext = curMsg._isGhost; 3312 curMsg._folderID = this._datastore._mapFolder(aMsgHdr.folder).id; 3313 curMsg._messageKey = aMsgHdr.messageKey; 3314 curMsg.date = new Date(aMsgHdr.date / 1000); 3315 // the message may have been deleted; tell it to make sure it's not. 3316 curMsg._ensureNotDeleted(); 3317 // note: we are assuming that our matching logic is flawless in that 3318 // if this message was not a ghost, we are assuming the 'body' 3319 // associated with the id is still exactly the same. It is conceivable 3320 // that there are cases where this is not true. 3321 } 3322 3323 if (aMimeMsg) { 3324 let bodyPlain = aMimeMsg.coerceBodyToPlaintext(aMsgHdr.folder); 3325 if (bodyPlain) { 3326 curMsg._bodyLines = bodyPlain.split(/\r?\n/); 3327 // curMsg._content gets set by GlodaFundAttr.jsm 3328 } 3329 } 3330 3331 // Mark the message as new (for the purposes of fulltext insertion) 3332 if (insertFulltext) { 3333 curMsg._isNew = true; 3334 } 3335 3336 curMsg._subject = aMsgHdr.mime2DecodedSubject; 3337 curMsg._attachmentNames = attachmentNames; 3338 3339 // curMsg._indexAuthor gets set by GlodaFundAttr.jsm 3340 // curMsg._indexRecipients gets set by GlodaFundAttr.jsm 3341 3342 // zero the notability so everything in grokNounItem can just increment 3343 curMsg.notability = 0; 3344 3345 yield aCallbackHandle.pushAndGo( 3346 Gloda.grokNounItem( 3347 curMsg, 3348 { header: aMsgHdr, mime: aMimeMsg, bodyLines: curMsg._bodyLines }, 3349 isConceptuallyNew, 3350 isRecordNew, 3351 aCallbackHandle 3352 ) 3353 ); 3354 3355 delete curMsg._bodyLines; 3356 delete curMsg._content; 3357 delete curMsg._isNew; 3358 delete curMsg._indexAuthor; 3359 delete curMsg._indexRecipients; 3360 3361 // we want to update the header for messages only after the transaction 3362 // irrevocably hits the disk. otherwise we could get confused if the 3363 // transaction rolls back or what not. 3364 PendingCommitTracker.track(aMsgHdr, curMsg.id); 3365 3366 yield this.kWorkDone; 3367 }, 3368 3369 /** 3370 * Wipe a message out of existence from our index. This is slightly more 3371 * tricky than one would first expect because there are potentially 3372 * attributes not immediately associated with this message that reference 3373 * the message. Not only that, but deletion of messages may leave a 3374 * conversation possessing only ghost messages, which we don't want, so we 3375 * need to nuke the moot conversation and its moot ghost messages. 3376 * For now, we are actually punting on that trickiness, and the exact 3377 * nuances aren't defined yet because we have not decided whether to store 3378 * such attributes redundantly. For example, if we have subject-pred-object, 3379 * we could actually store this as attributes (subject, id, object) and 3380 * (object, id, subject). In such a case, we could query on (subject, *) 3381 * and use the results to delete the (object, id, subject) case. If we 3382 * don't redundantly store attributes, we can deal with the problem by 3383 * collecting up all the attributes that accept a message as their object 3384 * type and issuing a delete against that. For example, delete (*, [1,2,3], 3385 * message id). 3386 * (We are punting because we haven't implemented support for generating 3387 * attributes like that yet.) 3388 * 3389 * @TODO: implement deletion of attributes that reference (deleted) messages 3390 */ 3391 *_deleteMessage(aMessage, aCallbackHandle) { 3392 this._log.debug("*** Deleting message: " + aMessage); 3393 3394 // -- delete our attributes 3395 // delete the message's attributes (if we implement the cascade delete, that 3396 // could do the honors for us... right now we define the trigger in our 3397 // schema but the back-end ignores it) 3398 GlodaDatastore.clearMessageAttributes(aMessage); 3399 3400 // -- delete our message or ghost us, and maybe nuke the whole conversation 3401 // Look at the other messages in the conversation. 3402 // (Note: although we are performing a lookup with no validity constraints 3403 // and using the same object-relational-mapper-ish layer used by things 3404 // that do have constraints, we are not at risk of exposing deleted 3405 // messages to other code and getting it confused. The only way code 3406 // can find a message is if it shows up in their queries or gets announced 3407 // via GlodaCollectionManager.itemsAdded, neither of which will happen.) 3408 let convPrivQuery = Gloda.newQuery(Gloda.NOUN_MESSAGE, { 3409 noDbQueryValidityConstraints: true, 3410 }); 3411 convPrivQuery.conversation(aMessage.conversation); 3412 let conversationCollection = convPrivQuery.getCollection(aCallbackHandle); 3413 yield this.kWorkAsync; 3414 3415 let conversationMsgs = conversationCollection.items; 3416 3417 // Count the number of ghosts messages we see to determine if we are 3418 // the last message alive. 3419 let ghostCount = 0; 3420 let twinMessageExists = false; 3421 for (let convMsg of conversationMsgs) { 3422 // ignore our own message 3423 if (convMsg.id == aMessage.id) { 3424 continue; 3425 } 3426 3427 if (convMsg._isGhost) { 3428 ghostCount++; 3429 } else if ( 3430 // This message is our (living) twin if it is not a ghost, not deleted, 3431 // and has the same message-id header. 3432 !convMsg._isDeleted && 3433 convMsg.headerMessageID == aMessage.headerMessageID 3434 ) { 3435 twinMessageExists = true; 3436 } 3437 } 3438 3439 // -- If everyone else is a ghost, blow away the conversation. 3440 // If there are messages still alive or deleted but we have not yet gotten 3441 // to them yet _deleteMessage, then do not do this. (We will eventually 3442 // hit this case if they are all deleted.) 3443 if (conversationMsgs.length - 1 == ghostCount) { 3444 // - Obliterate each message 3445 for (let msg of conversationMsgs) { 3446 GlodaDatastore.deleteMessageByID(msg.id); 3447 } 3448 // - Obliterate the conversation 3449 GlodaDatastore.deleteConversationByID(aMessage.conversationID); 3450 // *no one* should hold a reference or use aMessage after this point, 3451 // trash it so such ne'er do'wells are made plain. 3452 aMessage._objectPurgedMakeYourselfUnpleasant(); 3453 } else if (twinMessageExists) { 3454 // -- Ghost or purge us as appropriate 3455 // Purge us if we have a (living) twin; no ghost required. 3456 GlodaDatastore.deleteMessageByID(aMessage.id); 3457 // *no one* should hold a reference or use aMessage after this point, 3458 // trash it so such ne'er do'wells are made plain. 3459 aMessage._objectPurgedMakeYourselfUnpleasant(); 3460 } else { 3461 // No twin, a ghost is required, we become the ghost. 3462 aMessage._ghost(); 3463 GlodaDatastore.updateMessage(aMessage); 3464 // ghosts don't have fulltext. purge it. 3465 GlodaDatastore.deleteMessageTextByID(aMessage.id); 3466 } 3467 3468 yield this.kWorkDone; 3469 }, 3470}; 3471GlodaIndexer.registerIndexer(GlodaMsgIndexer); 3472