1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5"use strict";
6
7/*
8 * This file currently contains a fairly general implementation of asynchronous
9 *  indexing with a very explicit message indexing implementation.  As gloda
10 *  will eventually want to index more than just messages, the message-specific
11 *  things should ideally lose their special hold on this file.  This will
12 *  benefit readability/size as well.
13 */
14
15const EXPORTED_SYMBOLS = ["GlodaMsgIndexer"];
16
17const { MailServices } = ChromeUtils.import(
18  "resource:///modules/MailServices.jsm"
19);
20const { MailUtils } = ChromeUtils.import("resource:///modules/MailUtils.jsm");
21
22const { GlodaDatastore } = ChromeUtils.import(
23  "resource:///modules/gloda/GlodaDatastore.jsm"
24);
25const { GlodaContact, GlodaFolder } = ChromeUtils.import(
26  "resource:///modules/gloda/GlodaDataModel.jsm"
27);
28const { Gloda } = ChromeUtils.import("resource:///modules/gloda/Gloda.jsm");
29const { GlodaCollectionManager } = ChromeUtils.import(
30  "resource:///modules/gloda/Collection.jsm"
31);
32const { GlodaIndexer, IndexingJob } = ChromeUtils.import(
33  "resource:///modules/gloda/GlodaIndexer.jsm"
34);
35const { MsgHdrToMimeMessage } = ChromeUtils.import(
36  "resource:///modules/gloda/MimeMessage.jsm"
37);
38
39// Cr does not have mailnews error codes!
40var NS_MSG_ERROR_FOLDER_SUMMARY_OUT_OF_DATE = 0x80550005;
41
42var GLODA_MESSAGE_ID_PROPERTY = "gloda-id";
43/**
44 * Message header property to track dirty status; one of
45 *  |GlodaIndexer.kMessageClean|, |GlodaIndexer.kMessageDirty|,
46 *  |GlodaIndexer.kMessageFilthy|.
47 */
48var GLODA_DIRTY_PROPERTY = "gloda-dirty";
49
50/**
51 * The sentinel GLODA_MESSAGE_ID_PROPERTY value indicating that a message fails
52 *  to index and we should not bother trying again, at least not until a new
53 *  release is made.
54 *
55 * This should ideally just flip between 1 and 2, with GLODA_OLD_BAD_MESSAGE_ID
56 *  flipping in the other direction.  If we start having more trailing badness,
57 *  _indexerGetEnumerator and GLODA_OLD_BAD_MESSAGE_ID will need to be altered.
58 *
59 * When flipping this, be sure to update glodaTestHelper.js's copy.
60 */
61var GLODA_BAD_MESSAGE_ID = 2;
62/**
63 * The gloda id we used to use to mark messages as bad, but now should be
64 *  treated as eligible for indexing.  This is only ever used for consideration
65 *  when creating msg header enumerators with `_indexerGetEnumerator` which
66 *  means we only will re-index such messages in an indexing sweep.  Accordingly
67 *  event-driven indexing will still treat such messages as unindexed (and
68 *  unindexable) until an indexing sweep picks them up.
69 */
70var GLODA_OLD_BAD_MESSAGE_ID = 1;
71var GLODA_FIRST_VALID_MESSAGE_ID = 32;
72
73var JUNK_SCORE_PROPERTY = "junkscore";
74var JUNK_SPAM_SCORE_STR = Ci.nsIJunkMailPlugin.IS_SPAM_SCORE.toString();
75
76/**
77 * The processing flags that tell us that a message header has not yet been
78 *  reported to us via msgsClassified.  If it has one of these flags, it is
79 *  still being processed.
80 */
81var NOT_YET_REPORTED_PROCESSING_FLAGS =
82  Ci.nsMsgProcessingFlags.NotReportedClassified |
83  Ci.nsMsgProcessingFlags.ClassifyJunk;
84
85// for list comprehension fun
86function* range(begin, end) {
87  for (let i = begin; i < end; ++i) {
88    yield i;
89  }
90}
91
92/**
93 * We do not set properties on the messages until we perform a DB commit; this
94 *  helper class tracks messages that we have indexed but are not yet marked
95 *  as such on their header.
96 */
97var PendingCommitTracker = {
98  /**
99   * Maps message URIs to their gloda ids.
100   *
101   * I am not entirely sure why I chose the URI for the key rather than
102   *  gloda folder ID + message key.  Most likely it was to simplify debugging
103   *  since the gloda folder ID is opaque while the URI is very informative.  It
104   *  is also possible I was afraid of IMAP folder renaming triggering a UID
105   *  renumbering?
106   */
107  _indexedMessagesPendingCommitByKey: {},
108  /**
109   * Map from the pending commit gloda id to a tuple of [the corresponding
110   *  message header, dirtyState].
111   */
112  _indexedMessagesPendingCommitByGlodaId: {},
113  /**
114   * Do we have a post-commit handler registered with this transaction yet?
115   */
116  _pendingCommit: false,
117
118  /**
119   * The function gets called when the commit actually happens to flush our
120   *  message id's.
121   *
122   * It is very possible that by the time this call happens we have left the
123   *  folder and nulled out msgDatabase on the folder.  Since nulling it out
124   *  is what causes the commit, if we set the headers here without somehow
125   *  forcing a commit, we will lose.  Badly.
126   * Accordingly, we make a list of all the folders that the headers belong to
127   *  as we iterate, make sure to re-attach their msgDatabase before forgetting
128   *  the headers, then make sure to zero the msgDatabase again, triggering a
129   *  commit.  If there were a way to directly get the nsIMsgDatabase from the
130   *  header we could do that and call commit directly.  We don't track
131   *  databases along with the headers since the headers can change because of
132   *  moves and that would increase the number of moving parts.
133   */
134  _commitCallback() {
135    let foldersByURI = {};
136    let lastFolder = null;
137
138    for (let glodaId in PendingCommitTracker._indexedMessagesPendingCommitByGlodaId) {
139      let [
140        msgHdr,
141        dirtyState,
142      ] = PendingCommitTracker._indexedMessagesPendingCommitByGlodaId[glodaId];
143      // Mark this message as indexed.
144      // It's conceivable the database could have gotten blown away, in which
145      //  case the message headers are going to throw exceptions when we try
146      //  and touch them.  So we wrap this in a try block that complains about
147      //  this unforeseen circumstance.  (noteFolderDatabaseGettingBlownAway
148      //  should have been called and avoided this situation in all known
149      //  situations.)
150      try {
151        let curGlodaId = msgHdr.getUint32Property(GLODA_MESSAGE_ID_PROPERTY);
152        if (curGlodaId != glodaId) {
153          msgHdr.setUint32Property(GLODA_MESSAGE_ID_PROPERTY, glodaId);
154        }
155        let headerDirty = msgHdr.getUint32Property(GLODA_DIRTY_PROPERTY);
156        if (headerDirty != dirtyState) {
157          msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, dirtyState);
158        }
159
160        // Make sure this folder is in our foldersByURI map.
161        if (lastFolder == msgHdr.folder) {
162          continue;
163        }
164        lastFolder = msgHdr.folder;
165        let folderURI = lastFolder.URI;
166        if (!(folderURI in foldersByURI)) {
167          foldersByURI[folderURI] = lastFolder;
168        }
169      } catch (ex) {
170        GlodaMsgIndexer._log.error(
171          "Exception while attempting to mark message with gloda state after" +
172            "db commit",
173          ex
174        );
175      }
176    }
177
178    // it is vitally important to do this before we forget about the headers!
179    for (let uri in foldersByURI) {
180      let folder = foldersByURI[uri];
181      // This will not cause a parse.  The database is in-memory since we have
182      //  a header that belongs to it.  This just causes the folder to
183      //  re-acquire a reference from the database manager.
184      folder.msgDatabase;
185      // And this will cause a commit.  (And must be done since we don't want
186      //  to cause a leak.)
187      folder.msgDatabase = null;
188    }
189
190    PendingCommitTracker._indexedMessagesPendingCommitByGlodaId = {};
191    PendingCommitTracker._indexedMessagesPendingCommitByKey = {};
192
193    PendingCommitTracker._pendingCommit = false;
194  },
195
196  /**
197   * Track a message header that should be marked with the given gloda id when
198   *  the database commits.
199   */
200  track(aMsgHdr, aGlodaId) {
201    let pendingKey = aMsgHdr.folder.URI + "#" + aMsgHdr.messageKey;
202    this._indexedMessagesPendingCommitByKey[pendingKey] = aGlodaId;
203    this._indexedMessagesPendingCommitByGlodaId[aGlodaId] = [
204      aMsgHdr,
205      GlodaMsgIndexer.kMessageClean,
206    ];
207
208    if (!this._pendingCommit) {
209      GlodaDatastore.runPostCommit(this._commitCallback);
210      this._pendingCommit = true;
211    }
212  },
213
214  /**
215   * Get the current state of a message header given that we cannot rely on just
216   *  looking at the header's properties because we defer setting those
217   *  until the SQLite commit happens.
218   *
219   * @return Tuple of [gloda id, dirty status].
220   */
221  getGlodaState(aMsgHdr) {
222    // If it's in the pending commit table, then the message is basically
223    //  clean.  Return that info.
224    let pendingKey = aMsgHdr.folder.URI + "#" + aMsgHdr.messageKey;
225    if (pendingKey in this._indexedMessagesPendingCommitByKey) {
226      let glodaId =
227        PendingCommitTracker._indexedMessagesPendingCommitByKey[pendingKey];
228      return [glodaId, this._indexedMessagesPendingCommitByGlodaId[glodaId][1]];
229    }
230
231    // Otherwise the header's concept of state is correct.
232    let glodaId = aMsgHdr.getUint32Property(GLODA_MESSAGE_ID_PROPERTY);
233    let glodaDirty = aMsgHdr.getUint32Property(GLODA_DIRTY_PROPERTY);
234    return [glodaId, glodaDirty];
235  },
236
237  /**
238   * Update our structure to reflect moved headers.  Moves are currently
239   *  treated as weakly interesting and do not require a reindexing
240   *  although collections will get notified.  So our job is to to fix-up
241   *  the pending commit information if the message has a pending commit.
242   */
243  noteMove(aOldHdr, aNewHdr) {
244    let oldKey = aOldHdr.folder.URI + "#" + aOldHdr.messageKey;
245    if (!(oldKey in this._indexedMessagesPendingCommitByKey)) {
246      return;
247    }
248
249    let glodaId = this._indexedMessagesPendingCommitByKey[oldKey];
250    delete this._indexedMessagesPendingCommitByKey[oldKey];
251
252    let newKey = aNewHdr.folder.URI + "#" + aNewHdr.messageKey;
253    this._indexedMessagesPendingCommitByKey[newKey] = glodaId;
254
255    // only clobber the header, not the dirty state
256    this._indexedMessagesPendingCommitByGlodaId[glodaId][0] = aNewHdr;
257  },
258
259  /**
260   * A blind move is one where we have the source header but not the destination
261   *  header.  This happens for IMAP messages that do not involve offline fake
262   *  headers.
263   * XXX Since IMAP moves will propagate the gloda-id/gloda-dirty bits for us,
264   *  we could detect the other side of the move when it shows up as a
265   *  msgsClassified event and restore the mapping information.  Since the
266   *  offline fake header case should now cover the bulk of IMAP move
267   *  operations, we probably do not need to pursue this.
268   *
269   * We just re-dispatch to noteDirtyHeader because we can't do anything more
270   *  clever.
271   */
272  noteBlindMove(aOldHdr) {
273    this.noteDirtyHeader(aOldHdr);
274  },
275
276  /**
277   * If a message is dirty we should stop tracking it for post-commit
278   *  purposes.  This is not because we don't want to write to its header
279   *  when we commit as much as that we want to avoid |getHeaderGlodaState|
280   *  reporting that the message is clean.  We could complicate our state
281   *  by storing that information, but this is easier and ends up the same
282   *  in the end.
283   */
284  noteDirtyHeader(aMsgHdr) {
285    let pendingKey = aMsgHdr.folder.URI + "#" + aMsgHdr.messageKey;
286    if (!(pendingKey in this._indexedMessagesPendingCommitByKey)) {
287      return;
288    }
289
290    // (It is important that we get the gloda id from our own structure!)
291    let glodaId = this._indexedMessagesPendingCommitByKey[pendingKey];
292    this._indexedMessagesPendingCommitByGlodaId[glodaId][1] =
293      GlodaMsgIndexer.kMessageDirty;
294  },
295
296  /**
297   * Sometimes a folder database gets blown away.  This happens for one of two
298   *  expected reasons right now:
299   * - Folder compaction.
300   * - Explicit reindexing of a folder via the folder properties "rebuild index"
301   *    button.
302   *
303   * When this happens, we are basically out of luck and need to discard
304   *  everything about the folder.  The good news is that the folder compaction
305   *  pass is clever enough to re-establish the linkages that are being lost
306   *  when we drop these things on the floor.  Reindexing of a folder is not
307   *  clever enough to deal with this but is an exceptional case of last resort
308   *  (the user should not normally be performing a reindex as part of daily
309   *  operation), so we accept that messages may be redundantly indexed.
310   */
311  noteFolderDatabaseGettingBlownAway(aMsgFolder) {
312    let uri = aMsgFolder.URI + "#";
313    for (let key of Object.keys(this._indexedMessagesPendingCommitByKey)) {
314      // this is not as efficient as it could be, but compaction is relatively
315      //  rare and the number of pending headers is generally going to be
316      //  small.
317      if (key.indexOf(uri) == 0) {
318        delete this._indexedMessagesPendingCommitByKey[key];
319      }
320    }
321  },
322};
323
324/**
325 * This callback handles processing the asynchronous query results of
326 *  |GlodaMsgIndexer.getMessagesByMessageID|.
327 */
328function MessagesByMessageIdCallback(
329  aMsgIDToIndex,
330  aResults,
331  aCallback,
332  aCallbackThis
333) {
334  this.msgIDToIndex = aMsgIDToIndex;
335  this.results = aResults;
336  this.callback = aCallback;
337  this.callbackThis = aCallbackThis;
338}
339
340MessagesByMessageIdCallback.prototype = {
341  _log: console.createInstance({
342    prefix: "gloda.index_msg.mbm",
343    maxLogLevel: "Warn",
344    maxLogLevelPref: "gloda.loglevel",
345  }),
346
347  onItemsAdded(aItems, aCollection) {
348    // just outright bail if we are shutdown
349    if (GlodaDatastore.datastoreIsShutdown) {
350      return;
351    }
352
353    this._log.debug("getting results...");
354    for (let message of aItems) {
355      this.results[this.msgIDToIndex[message.headerMessageID]].push(message);
356    }
357  },
358  onItemsModified() {},
359  onItemsRemoved() {},
360  onQueryCompleted(aCollection) {
361    // just outright bail if we are shutdown
362    if (GlodaDatastore.datastoreIsShutdown) {
363      return;
364    }
365
366    this._log.debug("query completed, notifying... " + this.results);
367
368    this.callback.call(this.callbackThis, this.results);
369  },
370};
371
372/**
373 * The message indexer!
374 *
375 * === Message Indexing Strategy
376 * To these ends, we implement things like so:
377 *
378 * Message State Tracking
379 * - We store a property on all indexed headers indicating their gloda message
380 *   id.  This allows us to tell whether a message is indexed from the header,
381 *   without having to consult the SQL database.
382 * - When we receive an event that indicates that a message's meta-data has
383 *   changed and gloda needs to re-index the message, we set a property on the
384 *   header that indicates the message is dirty.  This property can indicate
385 *   that the message needs to be re-indexed but the gloda-id is valid (dirty)
386 *   or that the message's gloda-id is invalid (filthy) because the gloda
387 *   database has been blown away.
388 * - We track whether a folder is up-to-date on our GlodaFolder representation
389 *   using a concept of dirtiness, just like messages.  Like messages, a folder
390 *   can be dirty or filthy.  A dirty folder has at least one dirty message in
391 *   it which means we should scan the folder.  A filthy folder means that
392 *   every message in the folder should be considered filthy.  Folders start
393 *   out filthy when Gloda is first told about them indicating we cannot
394 *   trust any of the gloda-id's in the folders.  Filthy folders are downgraded
395 *   to dirty folders after we mark all of the headers with gloda-id's filthy.
396 *
397 * Indexing Message Control
398 * - We index the headers of all IMAP messages. We index the bodies of all IMAP
399 *   messages that are offline.  We index all local messages.  We plan to avoid
400 *   indexing news messages.
401 * - We would like a way to express desires about indexing that either don't
402 *   confound offline storage with indexing, or actually allow some choice.
403 *
404 * Indexing Messages
405 * - We have two major modes of indexing: sweep and event-driven.  When we
406 *   start up we kick off an indexing sweep.  We use event-driven indexing
407 *   as we receive events for eligible messages, but if we get too many
408 *   events we start dropping them on the floor and just flag that an indexing
409 *   sweep is required.
410 * - The sweep initiates folder indexing jobs based on the priorities assigned
411 *   to folders.  Folder indexing uses a filtered message enumerator to find
412 *   messages that need to be indexed, minimizing wasteful exposure of message
413 *   headers to XPConnect that we would not end up indexing.
414 * - For local folders, we use GetDatabaseWithReparse to ensure that the .msf
415 *   file exists.  For IMAP folders, we simply use GetDatabase because we know
416 *   the auto-sync logic will make sure that the folder is up-to-date and we
417 *   want to avoid creating problems through use of updateFolder.
418 *
419 * Junk Mail
420 * - We do not index junk.  We do not index messages until the junk/non-junk
421 *   determination has been made.  If a message gets marked as junk, we act like
422 *   it was deleted.
423 * - We know when a message is actively queued for junk processing thanks to
424 *   folder processing flags.  nsMsgDBFolder::CallFilterPlugins does this
425 *   prior to initiating spam processing.  Unfortunately, this method does not
426 *   get called until after we receive the notification about the existence of
427 *   the header.  How long after can vary on different factors.  The longest
428 *   delay is in the IMAP case where there is a filter that requires the
429 *   message body to be present; the method does not get called until all the
430 *   bodies are downloaded.
431 *
432 */
433var GlodaMsgIndexer = {
434  /**
435   * A partial attempt to generalize to support multiple databases.  Each
436   *  database would have its own datastore would have its own indexer.  But
437   *  we rather inter-mingle our use of this field with the singleton global
438   *  GlodaDatastore.
439   */
440  _datastore: GlodaDatastore,
441  _log: console.createInstance({
442    prefix: "gloda.index_msg",
443    maxLogLevel: "Warn",
444    maxLogLevelPref: "gloda.loglevel",
445  }),
446
447  _junkService: MailServices.junk,
448
449  name: "index_msg",
450  /**
451   * Are we enabled, read: are we processing change events?
452   */
453  _enabled: false,
454  get enabled() {
455    return this._enabled;
456  },
457
458  enable() {
459    // initialize our listeners' this pointers
460    this._databaseAnnouncerListener.indexer = this;
461    this._msgFolderListener.indexer = this;
462
463    // register for:
464    // - folder loaded events, so we know when getDatabaseWithReparse has
465    //   finished updating the index/what not (if it wasn't immediately
466    //   available)
467    // - property changes (so we know when a message's read/starred state have
468    //   changed.)
469    this._folderListener._init(this);
470    MailServices.mailSession.AddFolderListener(
471      this._folderListener,
472      Ci.nsIFolderListener.intPropertyChanged |
473        Ci.nsIFolderListener.propertyFlagChanged |
474        Ci.nsIFolderListener.event
475    );
476
477    MailServices.mfn.addListener(
478      this._msgFolderListener,
479      // note: intentionally no msgAdded or msgUnincorporatedMoved.
480      Ci.nsIMsgFolderNotificationService.msgsClassified |
481        Ci.nsIMsgFolderNotificationService.msgsJunkStatusChanged |
482        Ci.nsIMsgFolderNotificationService.msgsDeleted |
483        Ci.nsIMsgFolderNotificationService.msgsMoveCopyCompleted |
484        Ci.nsIMsgFolderNotificationService.msgKeyChanged |
485        Ci.nsIMsgFolderNotificationService.folderAdded |
486        Ci.nsIMsgFolderNotificationService.folderDeleted |
487        Ci.nsIMsgFolderNotificationService.folderMoveCopyCompleted |
488        Ci.nsIMsgFolderNotificationService.folderRenamed |
489        Ci.nsIMsgFolderNotificationService.folderCompactStart |
490        Ci.nsIMsgFolderNotificationService.folderCompactFinish |
491        Ci.nsIMsgFolderNotificationService.folderReindexTriggered
492    );
493
494    this._enabled = true;
495
496    this._considerSchemaMigration();
497
498    this._log.info("Event-Driven Indexing is now " + this._enabled);
499  },
500  disable() {
501    // remove FolderLoaded notification listener
502    MailServices.mailSession.RemoveFolderListener(this._folderListener);
503
504    MailServices.mfn.removeListener(this._msgFolderListener);
505
506    this._indexerLeaveFolder(); // nop if we aren't "in" a folder
507
508    this._enabled = false;
509
510    this._log.info("Event-Driven Indexing is now " + this._enabled);
511  },
512
513  /**
514   * Indicates that we have pending deletions to process, meaning that there
515   *  are gloda message rows flagged for deletion.  If this value is a boolean,
516   *  it means the value is known reliably.  If this value is null, it means
517   *  that we don't know, likely because we have started up and have not checked
518   *  the database.
519   */
520  pendingDeletions: null,
521
522  /**
523   * The message (or folder state) is believed up-to-date.
524   */
525  kMessageClean: 0,
526  /**
527   * The message (or folder) is known to not be up-to-date. In the case of
528   *  folders, this means that some of the messages in the folder may be dirty.
529   *  However, because of the way our indexing works, it is possible there may
530   *  actually be no dirty messages in a folder.  (We attempt to process
531   *  messages in an event-driven fashion for a finite number of messages, but
532   *  because we can quit without completing processing of the queue, we need to
533   *  mark the folder dirty, just-in-case.)  (We could do some extra leg-work
534   *  and do a better job of marking the folder clean again.)
535   */
536  kMessageDirty: 1,
537  /**
538   * We have not indexed the folder at all, but messages in the folder think
539   *  they are indexed.  We downgrade the folder to just kMessageDirty after
540   *  marking all the messages in the folder as dirty.  We do this so that if we
541   *  have to stop indexing the folder we can still build on our progress next
542   *  time we enter the folder.
543   * We mark all folders filthy when (re-)creating the database because there
544   *  may be previous state left over from an earlier database.
545   */
546  kMessageFilthy: 2,
547
548  /**
549   * A message addition job yet to be (completely) processed.  Since message
550   *  addition events come to us one-by-one, in order to aggregate them into a
551   *  job, we need something like this.  It's up to the indexing loop to
552   *  decide when to null this out; it can either do it when it first starts
553   *  processing it, or when it has processed the last thing.  It's really a
554   *  question of whether we want retrograde motion in the folder progress bar
555   *  or the message progress bar.
556   */
557  _pendingAddJob: null,
558
559  /**
560   * The number of messages that we should queue for processing before letting
561   *  them fall on the floor and relying on our folder-walking logic to ensure
562   *  that the messages are indexed.
563   * The reason we allow for queueing messages in an event-driven fashion is
564   *  that once we have reached a steady-state, it is preferable to be able to
565   *  deal with new messages and modified meta-data in a prompt fashion rather
566   *  than having to (potentially) walk every folder in the system just to find
567   *  the message that the user changed the tag on.
568   */
569  _indexMaxEventQueueMessages: 20,
570
571  /**
572   * Unit testing hook to get us to emit additional logging that verges on
573   *  inane for general usage but is helpful in unit test output to get a lay
574   *  of the land and for paranoia reasons.
575   */
576  _unitTestSuperVerbose: false,
577
578  /** The GlodaFolder corresponding to the folder we are indexing. */
579  _indexingGlodaFolder: null,
580  /** The nsIMsgFolder we are currently indexing. */
581  _indexingFolder: null,
582  /** The nsIMsgDatabase we are currently indexing. */
583  _indexingDatabase: null,
584  /**
585   * The iterator we are using to iterate over the headers in
586   *  this._indexingDatabase.
587   */
588  _indexingIterator: null,
589
590  /** folder whose entry we are pending on */
591  _pendingFolderEntry: null,
592
593  // copy-down the work constants from Gloda
594  kWorkSync: Gloda.kWorkSync,
595  kWorkAsync: Gloda.kWorkAsync,
596  kWorkDone: Gloda.kWorkDone,
597  kWorkPause: Gloda.kWorkPause,
598  kWorkDoneWithResult: Gloda.kWorkDoneWithResult,
599
600  /**
601   * Async common logic that we want to deal with the given folder ID.  Besides
602   *  cutting down on duplicate code, this ensures that we are listening on
603   *  the folder in case it tries to go away when we are using it.
604   *
605   * @return true when the folder was successfully entered, false when we need
606   *     to pend on notification of updating of the folder (due to re-parsing
607   *     or what have you).  In the event of an actual problem, an exception
608   *     will escape.
609   */
610  _indexerEnterFolder(aFolderID) {
611    // leave the folder if we haven't explicitly left it.
612    if (this._indexingFolder !== null) {
613      this._indexerLeaveFolder();
614    }
615
616    this._indexingGlodaFolder = GlodaDatastore._mapFolderID(aFolderID);
617    this._indexingFolder = this._indexingGlodaFolder.getXPCOMFolder(
618      this._indexingGlodaFolder.kActivityIndexing
619    );
620
621    if (this._indexingFolder) {
622      this._log.debug("Entering folder: " + this._indexingFolder.URI);
623    }
624
625    try {
626      // The msf may need to be created or otherwise updated for local folders.
627      // This may require yielding until such time as the msf has been created.
628      try {
629        if (this._indexingFolder instanceof Ci.nsIMsgLocalMailFolder) {
630          this._indexingDatabase = this._indexingFolder.getDatabaseWithReparse(
631            null,
632            null
633          );
634        }
635        // we need do nothing special for IMAP, news, or other
636      } catch (e) {
637        // getDatabaseWithReparse can return either NS_ERROR_NOT_INITIALIZED or
638        //  NS_MSG_ERROR_FOLDER_SUMMARY_OUT_OF_DATE if the net result is that it
639        //  is going to send us a notification when the reparse has completed.
640        // (note that although internally NS_MSG_ERROR_FOLDER_SUMMARY_MISSING
641        //  might get flung around, it won't make it out to us, and will instead
642        //  be permuted into an NS_ERROR_NOT_INITIALIZED.)
643        if (
644          e.result == Cr.NS_ERROR_NOT_INITIALIZED ||
645          e.result == NS_MSG_ERROR_FOLDER_SUMMARY_OUT_OF_DATE
646        ) {
647          // this means that we need to pend on the update; the listener for
648          //  FolderLoaded events will call _indexerCompletePendingFolderEntry.
649          this._log.debug("Pending on folder load...");
650          this._pendingFolderEntry = this._indexingFolder;
651          return this.kWorkAsync;
652        }
653        throw e;
654      }
655      // we get an nsIMsgDatabase out of this (unsurprisingly) which
656      //  explicitly inherits from nsIDBChangeAnnouncer, which has the
657      //  AddListener call we want.
658      if (this._indexingDatabase == null) {
659        this._indexingDatabase = this._indexingFolder.msgDatabase;
660      }
661      this._indexingDatabase.AddListener(this._databaseAnnouncerListener);
662    } catch (ex) {
663      this._log.error(
664        "Problem entering folder: " +
665          (this._indexingFolder ? this._indexingFolder.prettyName : "unknown") +
666          ", skipping. Error was: " +
667          ex.fileName +
668          ":" +
669          ex.lineNumber +
670          ": " +
671          ex
672      );
673      this._indexingGlodaFolder.indexing = false;
674      this._indexingFolder = null;
675      this._indexingGlodaFolder = null;
676      this._indexingDatabase = null;
677      this._indexingEnumerator = null;
678
679      // re-throw, we just wanted to make sure this junk is cleaned up and
680      //  get localized error logging...
681      throw ex;
682    }
683
684    return this.kWorkSync;
685  },
686
687  /**
688   * If the folder was still parsing/updating when we tried to enter, then this
689   *  handler will get called by the listener who got the FolderLoaded message.
690   * All we need to do is get the database reference, register a listener on
691   *  the db, and retrieve an iterator if desired.
692   */
693  _indexerCompletePendingFolderEntry() {
694    this._indexingDatabase = this._indexingFolder.msgDatabase;
695    this._indexingDatabase.AddListener(this._databaseAnnouncerListener);
696    this._log.debug("...Folder Loaded!");
697
698    // the load is no longer pending; we certainly don't want more notifications
699    this._pendingFolderEntry = null;
700    // indexerEnterFolder returned kWorkAsync, which means we need to notify
701    //  the callback driver to get things going again.
702    GlodaIndexer.callbackDriver();
703  },
704
705  /**
706   * Enumerate all messages in the folder.
707   */
708  kEnumAllMsgs: 0,
709  /**
710   * Enumerate messages that look like they need to be indexed.
711   */
712  kEnumMsgsToIndex: 1,
713  /**
714   * Enumerate messages that are already indexed.
715   */
716  kEnumIndexedMsgs: 2,
717
718  /**
719   * Synchronous helper to get an enumerator for the current folder (as found
720   *  in |_indexingFolder|.
721   *
722   * @param aEnumKind One of |kEnumAllMsgs|, |kEnumMsgsToIndex|, or
723   *     |kEnumIndexedMsgs|.
724   * @param [aAllowPreBadIds=false] Only valid for |kEnumIndexedMsgs|, tells us
725   *     that we should treat message with any gloda-id as dirty, not just
726   *     messages that have non-bad message id's.
727   */
728  _indexerGetEnumerator(aEnumKind, aAllowPreBadIds) {
729    if (aEnumKind == this.kEnumMsgsToIndex) {
730      // We need to create search terms for messages to index. Messages should
731      //  be indexed if they're indexable (local or offline and not expunged)
732      //  and either: haven't been indexed, are dirty, or are marked with with
733      //  a former GLODA_BAD_MESSAGE_ID that is no longer our bad marker.  (Our
734      //  bad marker can change on minor schema revs so that we can try and
735      //  reindex those messages exactly once and without needing to go through
736      //  a pass to mark them as needing one more try.)
737      // The basic search expression is:
738      //  ((GLODA_MESSAGE_ID_PROPERTY Is 0) ||
739      //   (GLODA_MESSAGE_ID_PROPERTY Is GLODA_OLD_BAD_MESSAGE_ID) ||
740      //   (GLODA_DIRTY_PROPERTY Isnt 0)) &&
741      //  (JUNK_SCORE_PROPERTY Isnt 100)
742      // If the folder !isLocal we add the terms:
743      //  - if the folder is offline -- && (Status Is nsMsgMessageFlags.Offline)
744      //  - && (Status Isnt nsMsgMessageFlags.Expunged)
745
746      let searchSession = Cc[
747        "@mozilla.org/messenger/searchSession;1"
748      ].createInstance(Ci.nsIMsgSearchSession);
749      let searchTerms = [];
750      let isLocal = this._indexingFolder instanceof Ci.nsIMsgLocalMailFolder;
751
752      searchSession.addScopeTerm(
753        Ci.nsMsgSearchScope.offlineMail,
754        this._indexingFolder
755      );
756      let nsMsgSearchAttrib = Ci.nsMsgSearchAttrib;
757      let nsMsgSearchOp = Ci.nsMsgSearchOp;
758
759      // first term: (GLODA_MESSAGE_ID_PROPERTY Is 0
760      let searchTerm = searchSession.createTerm();
761      searchTerm.booleanAnd = false; // actually don't care here
762      searchTerm.beginsGrouping = true;
763      searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty;
764      searchTerm.op = nsMsgSearchOp.Is;
765      let value = searchTerm.value;
766      value.attrib = searchTerm.attrib;
767      value.status = 0;
768      searchTerm.value = value;
769      searchTerm.hdrProperty = GLODA_MESSAGE_ID_PROPERTY;
770      searchTerms.push(searchTerm);
771
772      // second term: || GLODA_MESSAGE_ID_PROPERTY Is GLODA_OLD_BAD_MESSAGE_ID
773      searchTerm = searchSession.createTerm();
774      searchTerm.booleanAnd = false; // OR
775      searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty;
776      searchTerm.op = nsMsgSearchOp.Is;
777      value = searchTerm.value;
778      value.attrib = searchTerm.attrib;
779      value.status = GLODA_OLD_BAD_MESSAGE_ID;
780      searchTerm.value = value;
781      searchTerm.hdrProperty = GLODA_MESSAGE_ID_PROPERTY;
782      searchTerms.push(searchTerm);
783
784      //  third term: || GLODA_DIRTY_PROPERTY Isnt 0 )
785      searchTerm = searchSession.createTerm();
786      searchTerm.booleanAnd = false;
787      searchTerm.endsGrouping = true;
788      searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty;
789      searchTerm.op = nsMsgSearchOp.Isnt;
790      value = searchTerm.value;
791      value.attrib = searchTerm.attrib;
792      value.status = 0;
793      searchTerm.value = value;
794      searchTerm.hdrProperty = GLODA_DIRTY_PROPERTY;
795      searchTerms.push(searchTerm);
796
797      // JUNK_SCORE_PROPERTY Isnt 100
798      // For symmetry with our event-driven stuff, we just directly deal with
799      //  the header property.
800      searchTerm = searchSession.createTerm();
801      searchTerm.booleanAnd = true;
802      searchTerm.attrib = nsMsgSearchAttrib.HdrProperty;
803      searchTerm.op = nsMsgSearchOp.Isnt;
804      value = searchTerm.value;
805      value.attrib = searchTerm.attrib;
806      value.str = JUNK_SPAM_SCORE_STR;
807      searchTerm.value = value;
808      searchTerm.hdrProperty = JUNK_SCORE_PROPERTY;
809      searchTerms.push(searchTerm);
810
811      if (!isLocal) {
812        // If the folder is offline, then the message should be too
813        if (this._indexingFolder.getFlag(Ci.nsMsgFolderFlags.Offline)) {
814          // third term: && Status Is nsMsgMessageFlags.Offline
815          searchTerm = searchSession.createTerm();
816          searchTerm.booleanAnd = true;
817          searchTerm.attrib = nsMsgSearchAttrib.MsgStatus;
818          searchTerm.op = nsMsgSearchOp.Is;
819          value = searchTerm.value;
820          value.attrib = searchTerm.attrib;
821          value.status = Ci.nsMsgMessageFlags.Offline;
822          searchTerm.value = value;
823          searchTerms.push(searchTerm);
824        }
825
826        // fourth term: && Status Isnt nsMsgMessageFlags.Expunged
827        searchTerm = searchSession.createTerm();
828        searchTerm.booleanAnd = true;
829        searchTerm.attrib = nsMsgSearchAttrib.MsgStatus;
830        searchTerm.op = nsMsgSearchOp.Isnt;
831        value = searchTerm.value;
832        value.attrib = searchTerm.attrib;
833        value.status = Ci.nsMsgMessageFlags.Expunged;
834        searchTerm.value = value;
835        searchTerms.push(searchTerm);
836      }
837
838      this._indexingEnumerator = this._indexingDatabase.getFilterEnumerator(
839        searchTerms,
840        true
841      );
842    } else if (aEnumKind == this.kEnumIndexedMsgs) {
843      // Enumerate only messages that are already indexed.  This comes out to:
844      //  ((GLODA_MESSAGE_ID_PROPERTY > GLODA_FIRST_VALID_MESSAGE_ID-1) &&
845      //   (GLODA_DIRTY_PROPERTY Isnt kMessageFilthy))
846      // In English, a message is indexed if (by clause):
847      // 1) The message has a gloda-id and that gloda-id is in the valid range
848      //    (and not in the bad message marker range).
849      // 2) The message has not been marked filthy (which invalidates the
850      //    gloda-id.)  We also assume that the folder would not have been
851      //    entered at all if it was marked filthy.
852      let searchSession = Cc[
853        "@mozilla.org/messenger/searchSession;1"
854      ].createInstance(Ci.nsIMsgSearchSession);
855      let searchTerms = [];
856
857      searchSession.addScopeTerm(
858        Ci.nsMsgSearchScope.offlineMail,
859        this._indexingFolder
860      );
861      let nsMsgSearchAttrib = Ci.nsMsgSearchAttrib;
862      let nsMsgSearchOp = Ci.nsMsgSearchOp;
863
864      // first term: (GLODA_MESSAGE_ID_PROPERTY > GLODA_FIRST_VALID_MESSAGE_ID-1
865      let searchTerm = searchSession.createTerm();
866      searchTerm.booleanAnd = false; // actually don't care here
867      searchTerm.beginsGrouping = true;
868      searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty;
869      // use != 0 if we're allow pre-bad ids.
870      searchTerm.op = aAllowPreBadIds
871        ? nsMsgSearchOp.Isnt
872        : nsMsgSearchOp.IsGreaterThan;
873      let value = searchTerm.value;
874      value.attrib = searchTerm.attrib;
875      value.status = aAllowPreBadIds ? 0 : GLODA_FIRST_VALID_MESSAGE_ID - 1;
876      searchTerm.value = value;
877      searchTerm.hdrProperty = GLODA_MESSAGE_ID_PROPERTY;
878      searchTerms.push(searchTerm);
879
880      //  second term: && GLODA_DIRTY_PROPERTY Isnt kMessageFilthy)
881      searchTerm = searchSession.createTerm();
882      searchTerm.booleanAnd = true;
883      searchTerm.endsGrouping = true;
884      searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty;
885      searchTerm.op = nsMsgSearchOp.Isnt;
886      value = searchTerm.value;
887      value.attrib = searchTerm.attrib;
888      value.status = this.kMessageFilthy;
889      searchTerm.value = value;
890      searchTerm.hdrProperty = GLODA_DIRTY_PROPERTY;
891      searchTerms.push(searchTerm);
892
893      // The use-case of already indexed messages does not want them reversed;
894      //  we care about seeing the message keys in order.
895      this._indexingEnumerator = this._indexingDatabase.getFilterEnumerator(
896        searchTerms,
897        false
898      );
899    } else if (aEnumKind == this.kEnumAllMsgs) {
900      this._indexingEnumerator = this._indexingDatabase.ReverseEnumerateMessages();
901    } else {
902      throw new Error("Unknown enumerator type requested:" + aEnumKind);
903    }
904  },
905
906  _indexerLeaveFolder() {
907    if (this._indexingFolder !== null) {
908      if (this._indexingDatabase) {
909        this._indexingDatabase.Commit(Ci.nsMsgDBCommitType.kLargeCommit);
910        // remove our listener!
911        this._indexingDatabase.RemoveListener(this._databaseAnnouncerListener);
912      }
913      // let the gloda folder know we are done indexing
914      this._indexingGlodaFolder.indexing = false;
915      // null everyone out
916      this._indexingFolder = null;
917      this._indexingGlodaFolder = null;
918      this._indexingDatabase = null;
919      this._indexingEnumerator = null;
920    }
921  },
922
923  /**
924   * Event fed to us by our nsIFolderListener when a folder is loaded.  We use
925   *  this event to know when a folder we were trying to open to index is
926   *  actually ready to be indexed.  (The summary may have not existed, may have
927   *  been out of date, or otherwise.)
928   *
929   * @param aFolder An nsIMsgFolder, already QI'd.
930   */
931  _onFolderLoaded(aFolder) {
932    if (
933      this._pendingFolderEntry !== null &&
934      aFolder.URI == this._pendingFolderEntry.URI
935    ) {
936      this._indexerCompletePendingFolderEntry();
937    }
938  },
939
940  // it's a getter so we can reference 'this'.  we could memoize.
941  get workers() {
942    return [
943      [
944        "folderSweep",
945        {
946          worker: this._worker_indexingSweep,
947          jobCanceled: this._cleanup_indexingSweep,
948          cleanup: this._cleanup_indexingSweep,
949        },
950      ],
951      [
952        "folder",
953        {
954          worker: this._worker_folderIndex,
955          recover: this._recover_indexMessage,
956          cleanup: this._cleanup_indexing,
957        },
958      ],
959      [
960        "folderCompact",
961        {
962          worker: this._worker_folderCompactionPass,
963          // compaction enters the folder so needs to know how to leave
964          cleanup: this._cleanup_indexing,
965        },
966      ],
967      [
968        "message",
969        {
970          worker: this._worker_messageIndex,
971          onSchedule: this._schedule_messageIndex,
972          jobCanceled: this._canceled_messageIndex,
973          recover: this._recover_indexMessage,
974          cleanup: this._cleanup_indexing,
975        },
976      ],
977      [
978        "delete",
979        {
980          worker: this._worker_processDeletes,
981        },
982      ],
983
984      [
985        "fixMissingContacts",
986        {
987          worker: this._worker_fixMissingContacts,
988        },
989      ],
990    ];
991  },
992
993  _schemaMigrationInitiated: false,
994  _considerSchemaMigration() {
995    if (
996      !this._schemaMigrationInitiated &&
997      GlodaDatastore._actualSchemaVersion === 26
998    ) {
999      let job = new IndexingJob("fixMissingContacts", null);
1000      GlodaIndexer.indexJob(job);
1001      this._schemaMigrationInitiated = true;
1002    }
1003  },
1004
1005  initialSweep() {
1006    this.indexingSweepNeeded = true;
1007  },
1008
1009  _indexingSweepActive: false,
1010  /**
1011   * Indicate that an indexing sweep is desired.  We kick-off an indexing
1012   *  sweep at start-up and whenever we receive an event-based notification
1013   *  that we either can't process as an event or that we normally handle
1014   *  during the sweep pass anyways.
1015   */
1016  set indexingSweepNeeded(aNeeded) {
1017    if (!this._indexingSweepActive && aNeeded) {
1018      let job = new IndexingJob("folderSweep", null);
1019      job.mappedFolders = false;
1020      GlodaIndexer.indexJob(job);
1021      this._indexingSweepActive = true;
1022    }
1023  },
1024
1025  /**
1026   * Performs the folder sweep, locating folders that should be indexed, and
1027   *  creating a folder indexing job for them, and rescheduling itself for
1028   *  execution after that job is completed.  Once it indexes all the folders,
1029   *  if we believe we have deletions to process (or just don't know), it kicks
1030   *  off a deletion processing job.
1031   *
1032   * Folder traversal logic is based off the spotlight/vista indexer code; we
1033   *  retrieve the list of servers and folders each time want to find a new
1034   *  folder to index.  This avoids needing to maintain a perfect model of the
1035   *  folder hierarchy at all times.  (We may eventually want to do that, but
1036   *  this is sufficient and safe for now.)  Although our use of dirty flags on
1037   *  the folders allows us to avoid tracking the 'last folder' we processed,
1038   *  we do so to avoid getting 'trapped' in a folder with a high rate of
1039   *  changes.
1040   */
1041  *_worker_indexingSweep(aJob) {
1042    if (!aJob.mappedFolders) {
1043      // Walk the folders and make sure all the folders we would want to index
1044      //  are mapped.  Build up a list of GlodaFolders as we go, so that we can
1045      //  sort them by their indexing priority.
1046      let foldersToProcess = (aJob.foldersToProcess = []);
1047
1048      for (let folder of MailServices.accounts.allFolders) {
1049        if (this.shouldIndexFolder(folder)) {
1050          foldersToProcess.push(Gloda.getFolderForFolder(folder));
1051        }
1052      }
1053
1054      // sort the folders by priority (descending)
1055      foldersToProcess.sort(function(a, b) {
1056        return b.indexingPriority - a.indexingPriority;
1057      });
1058
1059      aJob.mappedFolders = true;
1060    }
1061
1062    // -- process the folders (in sorted order)
1063    while (aJob.foldersToProcess.length) {
1064      let glodaFolder = aJob.foldersToProcess.shift();
1065      // ignore folders that:
1066      // - have been deleted out of existence!
1067      // - are not dirty/have not been compacted
1068      // - are actively being compacted
1069      if (
1070        glodaFolder._deleted ||
1071        (!glodaFolder.dirtyStatus && !glodaFolder.compacted) ||
1072        glodaFolder.compacting
1073      ) {
1074        continue;
1075      }
1076
1077      // If the folder is marked as compacted, give it a compaction job.
1078      if (glodaFolder.compacted) {
1079        GlodaIndexer.indexJob(new IndexingJob("folderCompact", glodaFolder.id));
1080      }
1081
1082      // add a job for the folder indexing if it was dirty
1083      if (glodaFolder.dirtyStatus) {
1084        GlodaIndexer.indexJob(new IndexingJob("folder", glodaFolder.id));
1085      }
1086
1087      // re-schedule this job (although this worker will die)
1088      GlodaIndexer.indexJob(aJob);
1089      yield this.kWorkDone;
1090    }
1091
1092    // consider deletion
1093    if (this.pendingDeletions || this.pendingDeletions === null) {
1094      GlodaIndexer.indexJob(new IndexingJob("delete", null));
1095    }
1096
1097    // we don't have any more work to do...
1098    this._indexingSweepActive = false;
1099    yield this.kWorkDone;
1100  },
1101
1102  /**
1103   * The only state we need to cleanup is that there is no longer an active
1104   *  indexing sweep.
1105   */
1106  _cleanup_indexingSweep(aJob) {
1107    this._indexingSweepActive = false;
1108  },
1109
1110  /**
1111   * The number of headers to look at before yielding with kWorkSync.  This
1112   *  is for time-slicing purposes so we still yield to the UI periodically.
1113   */
1114  HEADER_CHECK_SYNC_BLOCK_SIZE: 25,
1115
1116  FOLDER_COMPACTION_PASS_BATCH_SIZE: 512,
1117  /**
1118   * Special indexing pass for (local) folders than have been compacted.  The
1119   *  compaction can cause message keys to change because message keys in local
1120   *  folders are simply offsets into the mbox file.  Accordingly, we need to
1121   *  update the gloda records/objects to point them at the new message key.
1122   *
1123   * Our general algorithm is to perform two traversals in parallel.  The first
1124   *  is a straightforward enumeration of the message headers in the folder that
1125   *  apparently have been already indexed.  These provide us with the message
1126   *  key and the "gloda-id" property.
1127   * The second is a list of tuples containing a gloda message id, its current
1128   *  message key per the gloda database, and the message-id header.  We re-fill
1129   *  the list with batches on-demand.  This allows us to both avoid dispatching
1130   *  needless UPDATEs as well as deal with messages that were tracked by the
1131   *  PendingCommitTracker but were discarded by the compaction notification.
1132   *
1133   * We end up processing two streams of gloda-id's and some extra info.  In
1134   *  the normal case we expect these two streams to line up exactly and all
1135   *  we need to do is update the message key if it has changed.
1136   *
1137   * There are a few exceptional cases where things do not line up:
1138   * 1) The gloda database knows about a message that the enumerator does not
1139   *    know about...
1140   *   a) This message exists in the folder (identified using its message-id
1141   *      header).  This means the message got indexed but PendingCommitTracker
1142   *      had to forget about the info when the compaction happened.  We
1143   *      re-establish the link and track the message in PendingCommitTracker
1144   *      again.
1145   *   b) The message does not exist in the folder.  This means the message got
1146   *      indexed, PendingCommitTracker had to forget about the info, and
1147   *      then the message either got moved or deleted before now.  We mark
1148   *      the message as deleted; this allows the gloda message to be reused
1149   *      if the move target has not yet been indexed or purged if it already
1150   *      has been and the gloda message is a duplicate.  And obviously, if the
1151   *      event that happened was actually a delete, then the delete is the
1152   *      right thing to do.
1153   * 2) The enumerator knows about a message that the gloda database does not
1154   *    know about.  This is unexpected and should not happen.  We log a
1155   *    warning.  We are able to differentiate this case from case #1a by
1156   *    retrieving the message header associated with the next gloda message
1157   *    (using the message-id header per 1a again).  If the gloda message's
1158   *    message key is after the enumerator's message key then we know this is
1159   *    case #2.  (It implies an insertion in the enumerator stream which is how
1160   *    we define the unexpected case.)
1161   *
1162   * Besides updating the database rows, we also need to make sure that
1163   *  in-memory representations are updated.  Immediately after dispatching
1164   *  UPDATE changes to the database we use the same set of data to walk the
1165   *  live collections and update any affected messages.  We are then able to
1166   *  discard the information.  Although this means that we will have to
1167   *  potentially walk the live collections multiple times, unless something
1168   *  has gone horribly wrong, the number of collections should be reasonable
1169   *  and the lookups are cheap.  We bias batch sizes accordingly.
1170   *
1171   * Because we operate based on chunks we need to make sure that when we
1172   *  actually deal with multiple chunks that we don't step on our own feet with
1173   *  our database updates.  Since compaction of message key K results in a new
1174   *  message key K' such that K' <= K, we can reliably issue database
1175   *  updates for all values <= K.  Which means our feet are safe no matter
1176   *  when we issue the update command.  For maximum cache benefit, we issue
1177   *  our updates prior to our new query since they should still be maximally
1178   *  hot at that point.
1179   */
1180  *_worker_folderCompactionPass(aJob, aCallbackHandle) {
1181    yield this._indexerEnterFolder(aJob.id);
1182
1183    // It's conceivable that with a folder sweep we might end up trying to
1184    //  compact a folder twice.  Bail early in this case.
1185    if (!this._indexingGlodaFolder.compacted) {
1186      yield this.kWorkDone;
1187    }
1188
1189    // this is a forward enumeration (sometimes we reverse enumerate; not here)
1190    this._indexerGetEnumerator(this.kEnumIndexedMsgs);
1191
1192    const HEADER_CHECK_SYNC_BLOCK_SIZE = this.HEADER_CHECK_SYNC_BLOCK_SIZE;
1193    const FOLDER_COMPACTION_PASS_BATCH_SIZE = this
1194      .FOLDER_COMPACTION_PASS_BATCH_SIZE;
1195
1196    // Tuples of [gloda id, message key, message-id header] from
1197    //  folderCompactionPassBlockFetch
1198    let glodaIdsMsgKeysHeaderIds = [];
1199    // Unpack each tuple from glodaIdsMsgKeysHeaderIds into these guys.
1200    // (Initialize oldMessageKey because we use it to kickstart our query.)
1201    let oldGlodaId,
1202      oldMessageKey = -1,
1203      oldHeaderMessageId;
1204    // parallel lists of gloda ids and message keys to pass to
1205    //  GlodaDatastore.updateMessageLocations
1206    let updateGlodaIds = [];
1207    let updateMessageKeys = [];
1208    // list of gloda id's to mark deleted
1209    let deleteGlodaIds = [];
1210
1211    // for GC reasons we need to track the number of headers seen
1212    let numHeadersSeen = 0;
1213
1214    // We are consuming two lists; our loop structure has to reflect that.
1215    let headerIter = this._indexingEnumerator[Symbol.iterator]();
1216    let mayHaveMoreGlodaMessages = true;
1217    let keepIterHeader = false;
1218    let keepGlodaTuple = false;
1219    let msgHdr = null;
1220    while (headerIter || mayHaveMoreGlodaMessages) {
1221      let glodaId;
1222      if (headerIter) {
1223        if (!keepIterHeader) {
1224          let result = headerIter.next();
1225          if (result.done) {
1226            headerIter = null;
1227            msgHdr = null;
1228            // do the loop check again
1229            continue;
1230          }
1231          msgHdr = result.value;
1232        } else {
1233          keepIterHeader = false;
1234        }
1235      }
1236
1237      if (msgHdr) {
1238        numHeadersSeen++;
1239        if (numHeadersSeen % HEADER_CHECK_SYNC_BLOCK_SIZE == 0) {
1240          yield this.kWorkSync;
1241        }
1242
1243        // There is no need to check with PendingCommitTracker.  If a message
1244        //  somehow got indexed between the time the compaction killed
1245        //  everything and the time we run, that is a bug.
1246        glodaId = msgHdr.getUint32Property(GLODA_MESSAGE_ID_PROPERTY);
1247        // (there is also no need to check for gloda dirty since the enumerator
1248        //  filtered that for us.)
1249      }
1250
1251      // get more [gloda id, message key, message-id header] tuples if out
1252      if (!glodaIdsMsgKeysHeaderIds.length && mayHaveMoreGlodaMessages) {
1253        // Since we operate on blocks, getting a new block implies we should
1254        //  flush the last block if applicable.
1255        if (updateGlodaIds.length) {
1256          GlodaDatastore.updateMessageLocations(
1257            updateGlodaIds,
1258            updateMessageKeys,
1259            aJob.id,
1260            true
1261          );
1262          updateGlodaIds = [];
1263          updateMessageKeys = [];
1264        }
1265
1266        if (deleteGlodaIds.length) {
1267          GlodaDatastore.markMessagesDeletedByIDs(deleteGlodaIds);
1268          deleteGlodaIds = [];
1269        }
1270
1271        GlodaDatastore.folderCompactionPassBlockFetch(
1272          aJob.id,
1273          oldMessageKey + 1,
1274          FOLDER_COMPACTION_PASS_BATCH_SIZE,
1275          aCallbackHandle.wrappedCallback
1276        );
1277        glodaIdsMsgKeysHeaderIds = yield this.kWorkAsync;
1278        // Reverse so we can use pop instead of shift and I don't need to be
1279        //  paranoid about performance.
1280        glodaIdsMsgKeysHeaderIds.reverse();
1281
1282        if (!glodaIdsMsgKeysHeaderIds.length) {
1283          mayHaveMoreGlodaMessages = false;
1284
1285          // We shouldn't be in the loop anymore if headerIter is dead now.
1286          if (!headerIter) {
1287            break;
1288          }
1289        }
1290      }
1291
1292      if (!keepGlodaTuple) {
1293        if (mayHaveMoreGlodaMessages) {
1294          [
1295            oldGlodaId,
1296            oldMessageKey,
1297            oldHeaderMessageId,
1298          ] = glodaIdsMsgKeysHeaderIds.pop();
1299        } else {
1300          oldGlodaId = oldMessageKey = oldHeaderMessageId = null;
1301        }
1302      } else {
1303        keepGlodaTuple = false;
1304      }
1305
1306      // -- normal expected case
1307      if (glodaId == oldGlodaId) {
1308        // only need to do something if the key is not right
1309        if (msgHdr.messageKey != oldMessageKey) {
1310          updateGlodaIds.push(glodaId);
1311          updateMessageKeys.push(msgHdr.messageKey);
1312        }
1313      } else {
1314        // -- exceptional cases
1315        // This should always return a value unless something is very wrong.
1316        //  We do not want to catch the exception if one happens.
1317        let idBasedHeader = oldHeaderMessageId
1318          ? this._indexingDatabase.getMsgHdrForMessageID(oldHeaderMessageId)
1319          : false;
1320        // - Case 1b.
1321        // We want to mark the message as deleted.
1322        if (idBasedHeader == null) {
1323          deleteGlodaIds.push(oldGlodaId);
1324        } else if (
1325          idBasedHeader &&
1326          ((msgHdr && idBasedHeader.messageKey < msgHdr.messageKey) || !msgHdr)
1327        ) {
1328          // - Case 1a
1329          // The expected case is that the message referenced by the gloda
1330          //  database precedes the header the enumerator told us about.  This
1331          //  is expected because if PendingCommitTracker did not mark the
1332          //  message as indexed/clean then the enumerator would not tell us
1333          //  about it.
1334          // Also, if we ran out of headers from the enumerator, this is a dead
1335          //  giveaway that this is the expected case.
1336          // tell the pending commit tracker about the gloda database one
1337          PendingCommitTracker.track(idBasedHeader, oldGlodaId);
1338          // and we might need to update the message key too
1339          if (idBasedHeader.messageKey != oldMessageKey) {
1340            updateGlodaIds.push(oldGlodaId);
1341            updateMessageKeys.push(idBasedHeader.messageKey);
1342          }
1343          // Take another pass through the loop so that we check the
1344          //  enumerator header against the next message in the gloda
1345          //  database.
1346          keepIterHeader = true;
1347        } else if (msgHdr) {
1348          // - Case 2
1349          // Whereas if the message referenced by gloda has a message key
1350          //  greater than the one returned by the enumerator, then we have a
1351          //  header claiming to be indexed by gloda that gloda does not
1352          //  actually know about.  This is exceptional and gets a warning.
1353          this._log.warn(
1354            "Observed header that claims to be gloda indexed " +
1355              "but that gloda has never heard of during " +
1356              "compaction." +
1357              " In folder: " +
1358              msgHdr.folder.URI +
1359              " sketchy key: " +
1360              msgHdr.messageKey +
1361              " subject: " +
1362              msgHdr.mime2DecodedSubject
1363          );
1364          // Keep this tuple around for the next enumerator provided header
1365          keepGlodaTuple = true;
1366        }
1367      }
1368    }
1369    // If we don't flush the update, no one will!
1370    if (updateGlodaIds.length) {
1371      GlodaDatastore.updateMessageLocations(
1372        updateGlodaIds,
1373        updateMessageKeys,
1374        aJob.id,
1375        true
1376      );
1377    }
1378    if (deleteGlodaIds.length) {
1379      GlodaDatastore.markMessagesDeletedByIDs(deleteGlodaIds);
1380    }
1381
1382    this._indexingGlodaFolder._setCompactedState(false);
1383
1384    this._indexerLeaveFolder();
1385    yield this.kWorkDone;
1386  },
1387
1388  /**
1389   * Index the contents of a folder.
1390   */
1391  *_worker_folderIndex(aJob, aCallbackHandle) {
1392    yield this._indexerEnterFolder(aJob.id);
1393
1394    if (!this.shouldIndexFolder(this._indexingFolder)) {
1395      aJob.safelyInvokeCallback(true);
1396      yield this.kWorkDone;
1397    }
1398
1399    // Make sure listeners get notified about this job.
1400    GlodaIndexer._notifyListeners();
1401
1402    // there is of course a cost to all this header investigation even if we
1403    //  don't do something.  so we will yield with kWorkSync for every block.
1404    const HEADER_CHECK_SYNC_BLOCK_SIZE = this.HEADER_CHECK_SYNC_BLOCK_SIZE;
1405
1406    // we can safely presume if we are here that this folder has been selected
1407    //  for offline processing...
1408
1409    // -- Filthy Folder
1410    // A filthy folder may have misleading properties on the message that claim
1411    //  the message is indexed.  They are misleading because the database, for
1412    //  whatever reason, does not have the messages (accurately) indexed.
1413    // We need to walk all the messages and mark them filthy if they have a
1414    //  dirty property.  Once we have done this, we can downgrade the folder's
1415    //  dirty status to plain dirty.  We do this rather than trying to process
1416    //  everyone in one go in a filthy context because if we have to terminate
1417    //  indexing before we quit, we don't want to have to re-index messages next
1418    //  time.  (This could even lead to never completing indexing in a
1419    //  pathological situation.)
1420    let glodaFolder = GlodaDatastore._mapFolder(this._indexingFolder);
1421    if (glodaFolder.dirtyStatus == glodaFolder.kFolderFilthy) {
1422      this._indexerGetEnumerator(this.kEnumIndexedMsgs, true);
1423      let count = 0;
1424      for (let msgHdr of this._indexingEnumerator) {
1425        // we still need to avoid locking up the UI, pause periodically...
1426        if (++count % HEADER_CHECK_SYNC_BLOCK_SIZE == 0) {
1427          yield this.kWorkSync;
1428        }
1429
1430        let glodaMessageId = msgHdr.getUint32Property(
1431          GLODA_MESSAGE_ID_PROPERTY
1432        );
1433        // if it has a gloda message id, we need to mark it filthy
1434        if (glodaMessageId != 0) {
1435          msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, this.kMessageFilthy);
1436        }
1437        // if it doesn't have a gloda message id, we will definitely index it,
1438        //  so no action is required.
1439      }
1440      // Commit the filthy status changes to the message database.
1441      this._indexingDatabase.Commit(Ci.nsMsgDBCommitType.kLargeCommit);
1442
1443      // this will automatically persist to the database
1444      glodaFolder._downgradeDirtyStatus(glodaFolder.kFolderDirty);
1445    }
1446
1447    // Figure out whether we're supposed to index _everything_ or just what
1448    //  has not yet been indexed.
1449    let force = "force" in aJob && aJob.force;
1450    let enumeratorType = force ? this.kEnumAllMsgs : this.kEnumMsgsToIndex;
1451
1452    // Pass 1: count the number of messages to index.
1453    //  We do this in order to be able to report to the user what we're doing.
1454    // TODO: give up after reaching a certain number of messages in folders
1455    //  with ridiculous numbers of messages and make the interface just say
1456    //  something like "over N messages to go."
1457
1458    this._indexerGetEnumerator(enumeratorType);
1459
1460    let numMessagesToIndex = 0;
1461    // eslint-disable-next-line no-unused-vars
1462    for (let ignore of this._indexingEnumerator) {
1463      // We're only counting, so do bigger chunks on this pass.
1464      ++numMessagesToIndex;
1465      if (numMessagesToIndex % (HEADER_CHECK_SYNC_BLOCK_SIZE * 8) == 0) {
1466        yield this.kWorkSync;
1467      }
1468    }
1469
1470    aJob.goal = numMessagesToIndex;
1471
1472    if (numMessagesToIndex > 0) {
1473      // We used up the iterator, get a new one.
1474      this._indexerGetEnumerator(enumeratorType);
1475
1476      // Pass 2: index the messages.
1477      let count = 0;
1478      for (let msgHdr of this._indexingEnumerator) {
1479        // per above, we want to periodically release control while doing all
1480        // this header traversal/investigation.
1481        if (++count % HEADER_CHECK_SYNC_BLOCK_SIZE == 0) {
1482          yield this.kWorkSync;
1483        }
1484
1485        // To keep our counts more accurate, increment the offset before
1486        //  potentially skipping any messages.
1487        ++aJob.offset;
1488
1489        // Skip messages that have not yet been reported to us as existing via
1490        //  msgsClassified.
1491        if (
1492          this._indexingFolder.getProcessingFlags(msgHdr.messageKey) &
1493          NOT_YET_REPORTED_PROCESSING_FLAGS
1494        ) {
1495          continue;
1496        }
1497
1498        // Because the gloda id could be in-flight, we need to double-check the
1499        //  enumerator here since it can't know about our in-memory stuff.
1500        let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(msgHdr);
1501        // if the message seems valid and we are not forcing indexing, skip it.
1502        //  (that means good gloda id and not dirty)
1503        if (
1504          !force &&
1505          glodaId >= GLODA_FIRST_VALID_MESSAGE_ID &&
1506          glodaDirty == this.kMessageClean
1507        ) {
1508          continue;
1509        }
1510
1511        this._log.debug(">>>  calling _indexMessage");
1512        yield aCallbackHandle.pushAndGo(
1513          this._indexMessage(msgHdr, aCallbackHandle),
1514          { what: "indexMessage", msgHdr }
1515        );
1516        GlodaIndexer._indexedMessageCount++;
1517        this._log.debug("<<<  back from _indexMessage");
1518      }
1519    }
1520
1521    // This will trigger an (async) db update which cannot hit the disk prior to
1522    //  the actual database records that constitute the clean state.
1523    // XXX There is the slight possibility that, in the event of a crash, this
1524    //  will hit the disk but the gloda-id properties on the headers will not
1525    //  get set.  This should ideally be resolved by detecting a non-clean
1526    //  shutdown and marking all folders as dirty.
1527    glodaFolder._downgradeDirtyStatus(glodaFolder.kFolderClean);
1528
1529    // by definition, it's not likely we'll visit this folder again anytime soon
1530    this._indexerLeaveFolder();
1531
1532    aJob.safelyInvokeCallback(true);
1533
1534    yield this.kWorkDone;
1535  },
1536
1537  /**
1538   * Invoked when a "message" job is scheduled so that we can clear
1539   *  _pendingAddJob if that is the job.  We do this so that work items are not
1540   *  added to _pendingAddJob while it is being processed.
1541   */
1542  _schedule_messageIndex(aJob, aCallbackHandle) {
1543    // we do not want new work items to be added as we are processing, so
1544    //  clear _pendingAddJob.  A new job will be created as needed.
1545    if (aJob === this._pendingAddJob) {
1546      this._pendingAddJob = null;
1547    }
1548    // update our goal from the items length
1549    aJob.goal = aJob.items.length;
1550  },
1551  /**
1552   * If the job gets canceled, we need to make sure that we clear out pending
1553   *  add job or our state will get wonky.
1554   */
1555  _canceled_messageIndex(aJob) {
1556    if (aJob === this._pendingAddJob) {
1557      this._pendingAddJob = null;
1558    }
1559  },
1560
1561  /**
1562   * Index a specific list of messages that we know to index from
1563   *  event-notification hints.
1564   */
1565  *_worker_messageIndex(aJob, aCallbackHandle) {
1566    // if we are already in the correct folder, our "get in the folder" clause
1567    //  will not execute, so we need to make sure this value is accurate in
1568    //  that case.  (and we want to avoid multiple checks...)
1569    for (; aJob.offset < aJob.items.length; aJob.offset++) {
1570      let item = aJob.items[aJob.offset];
1571      // item is either [folder ID, message key] or
1572      //                [folder ID, message ID]
1573
1574      let glodaFolderId = item[0];
1575      // If the folder has been deleted since we queued, skip this message
1576      if (!GlodaDatastore._folderIdKnown(glodaFolderId)) {
1577        continue;
1578      }
1579      let glodaFolder = GlodaDatastore._mapFolderID(glodaFolderId);
1580
1581      // Stay out of folders that:
1582      // - are compacting / compacted and not yet processed
1583      // - got deleted (this would be redundant if we had a stance on id nukage)
1584      // (these things could have changed since we queued the event)
1585      if (
1586        glodaFolder.compacting ||
1587        glodaFolder.compacted ||
1588        glodaFolder._deleted
1589      ) {
1590        continue;
1591      }
1592
1593      // get in the folder
1594      if (this._indexingGlodaFolder != glodaFolder) {
1595        yield this._indexerEnterFolder(glodaFolderId);
1596
1597        // Now that we have the real nsIMsgFolder, sanity-check that we should
1598        //  be indexing it.  (There are some checks that require the
1599        //  nsIMsgFolder.)
1600        if (!this.shouldIndexFolder(this._indexingFolder)) {
1601          continue;
1602        }
1603      }
1604
1605      let msgHdr;
1606      // GetMessageHeader can be affected by the use cache, so we need to check
1607      //  ContainsKey first to see if the header is really actually there.
1608      if (typeof item[1] == "number") {
1609        msgHdr =
1610          this._indexingDatabase.ContainsKey(item[1]) &&
1611          this._indexingFolder.GetMessageHeader(item[1]);
1612      } else {
1613        // Same deal as in move processing.
1614        // TODO fixme to not assume singular message-id's.
1615        msgHdr = this._indexingDatabase.getMsgHdrForMessageID(item[1]);
1616      }
1617
1618      if (msgHdr) {
1619        yield aCallbackHandle.pushAndGo(
1620          this._indexMessage(msgHdr, aCallbackHandle),
1621          { what: "indexMessage", msgHdr }
1622        );
1623      } else {
1624        yield this.kWorkSync;
1625      }
1626    }
1627
1628    // There is no real reason to stay 'in' the folder.  If we are going to get
1629    //  more events from the folder, its database would have to be open for us
1630    //  to get the events, so it's not like we're creating an efficiency
1631    //  problem where we unload a folder just to load it again in 2 seconds.
1632    // (Well, at least assuming the views are good about holding onto the
1633    //  database references even though they go out of their way to avoid
1634    //  holding onto message header references.)
1635    this._indexerLeaveFolder();
1636
1637    yield this.kWorkDone;
1638  },
1639
1640  /**
1641   * Recover from a "folder" or "message" job failing inside a call to
1642   *  |_indexMessage|, marking the message bad.  If we were not in an
1643   *  |_indexMessage| call, then fail to recover.
1644   *
1645   * @param aJob The job that was being worked.  We ignore this for now.
1646   * @param aContextStack The callbackHandle mechanism's context stack.  When we
1647   *     invoke pushAndGo for _indexMessage we put something in so we can
1648   *     detect when it is on the async stack.
1649   * @param aException The exception that is necessitating we attempt to
1650   *     recover.
1651   *
1652   * @return 1 if we were able to recover (because we want the call stack
1653   *     popped down to our worker), false if we can't.
1654   */
1655  _recover_indexMessage(aJob, aContextStack, aException) {
1656    // See if indexMessage is on the stack...
1657    if (
1658      aContextStack.length >= 2 &&
1659      aContextStack[1] &&
1660      "what" in aContextStack[1] &&
1661      aContextStack[1].what == "indexMessage"
1662    ) {
1663      // it is, so this is probably recoverable.
1664
1665      this._log.debug(
1666        "Exception while indexing message, marking it bad (gloda id of 1)."
1667      );
1668
1669      // -- Mark the message as bad
1670      let msgHdr = aContextStack[1].msgHdr;
1671      // (In the worst case, the header is no longer valid, which will result in
1672      //  exceptions.  We need to be prepared for that.)
1673      try {
1674        msgHdr.setUint32Property(
1675          GLODA_MESSAGE_ID_PROPERTY,
1676          GLODA_BAD_MESSAGE_ID
1677        );
1678        // clear the dirty bit if it has one
1679        if (msgHdr.getUint32Property(GLODA_DIRTY_PROPERTY)) {
1680          msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, 0);
1681        }
1682      } catch (ex) {
1683        // If we are indexing a folder and the message header is no longer
1684        //  valid, then it's quite likely the whole folder is no longer valid.
1685        //  But since in the event-driven message indexing case we could have
1686        //  other valid things to look at, let's try and recover.  The folder
1687        //  indexing case will come back to us shortly and we will indicate
1688        //  recovery is not possible at that point.
1689        // So do nothing here since by popping the indexing of the specific
1690        //  message out of existence we are recovering.
1691      }
1692      return 1;
1693    }
1694    return false;
1695  },
1696
1697  /**
1698   * Cleanup after an aborted "folder" or "message" job.
1699   */
1700  _cleanup_indexing(aJob) {
1701    this._indexerLeaveFolder();
1702    aJob.safelyInvokeCallback(false);
1703  },
1704
1705  /**
1706   * Maximum number of deleted messages to process at a time.  Arbitrary; there
1707   *  are no real known performance constraints at this point.
1708   */
1709  DELETED_MESSAGE_BLOCK_SIZE: 32,
1710
1711  /**
1712   * Process pending deletes...
1713   */
1714  *_worker_processDeletes(aJob, aCallbackHandle) {
1715    // Count the number of messages we will eventually process.  People freak
1716    //  out when the number is constantly increasing because they think gloda
1717    //  has gone rogue.  (Note: new deletions can still accumulate during
1718    //  our execution, so we may 'expand' our count a little still.)
1719    this._datastore.countDeletedMessages(aCallbackHandle.wrappedCallback);
1720    aJob.goal = yield this.kWorkAsync;
1721    this._log.debug(
1722      "There are currently " +
1723        aJob.goal +
1724        " messages awaiting" +
1725        " deletion processing."
1726    );
1727
1728    // get a block of messages to delete.
1729    let query = Gloda.newQuery(Gloda.NOUN_MESSAGE, {
1730      noDbQueryValidityConstraints: true,
1731    });
1732    query._deleted(1);
1733    query.limit(this.DELETED_MESSAGE_BLOCK_SIZE);
1734    let deletedCollection = query.getCollection(aCallbackHandle);
1735    yield this.kWorkAsync;
1736
1737    while (deletedCollection.items.length) {
1738      for (let message of deletedCollection.items) {
1739        // If it turns out our count is wrong (because some new deletions
1740        //  happened since we entered this worker), let's issue a new count
1741        //  and use that to accurately update our goal.
1742        if (aJob.offset >= aJob.goal) {
1743          this._datastore.countDeletedMessages(aCallbackHandle.wrappedCallback);
1744          aJob.goal += yield this.kWorkAsync;
1745        }
1746
1747        yield aCallbackHandle.pushAndGo(
1748          this._deleteMessage(message, aCallbackHandle)
1749        );
1750        aJob.offset++;
1751        yield this.kWorkSync;
1752      }
1753
1754      deletedCollection = query.getCollection(aCallbackHandle);
1755      yield this.kWorkAsync;
1756    }
1757    this.pendingDeletions = false;
1758
1759    yield this.kWorkDone;
1760  },
1761
1762  *_worker_fixMissingContacts(aJob, aCallbackHandle) {
1763    let identityContactInfos = [];
1764
1765    // -- asynchronously get a list of all identities without contacts
1766    // The upper bound on the number of messed up contacts is the number of
1767    //  contacts in the user's address book.  This should be small enough
1768    //  (and the data size small enough) that this won't explode thunderbird.
1769    let queryStmt = GlodaDatastore._createAsyncStatement(
1770      "SELECT identities.id, identities.contactID, identities.value " +
1771        "FROM identities " +
1772        "LEFT JOIN contacts ON identities.contactID = contacts.id " +
1773        "WHERE identities.kind = 'email' AND contacts.id IS NULL",
1774      true
1775    );
1776    queryStmt.executeAsync({
1777      handleResult(aResultSet) {
1778        let row;
1779        while ((row = aResultSet.getNextRow())) {
1780          identityContactInfos.push({
1781            identityId: row.getInt64(0),
1782            contactId: row.getInt64(1),
1783            email: row.getString(2),
1784          });
1785        }
1786      },
1787      handleError(aError) {},
1788      handleCompletion(aReason) {
1789        GlodaDatastore._asyncCompleted();
1790        aCallbackHandle.wrappedCallback();
1791      },
1792    });
1793    queryStmt.finalize();
1794    GlodaDatastore._pendingAsyncStatements++;
1795    yield this.kWorkAsync;
1796
1797    // -- perform fixes only if there were missing contacts
1798    if (identityContactInfos.length) {
1799      const yieldEvery = 64;
1800      // - create the missing contacts
1801      for (let i = 0; i < identityContactInfos.length; i++) {
1802        if (i % yieldEvery === 0) {
1803          yield this.kWorkSync;
1804        }
1805
1806        let info = identityContactInfos[i],
1807          card = MailServices.ab.cardForEmailAddress(info.email),
1808          contact = new GlodaContact(
1809            GlodaDatastore,
1810            info.contactId,
1811            null,
1812            null,
1813            card ? card.displayName || info.email : info.email,
1814            0,
1815            0
1816          );
1817        GlodaDatastore.insertContact(contact);
1818
1819        // update the in-memory rep of the identity to know about the contact
1820        //  if there is one.
1821        let identity = GlodaCollectionManager.cacheLookupOne(
1822          Gloda.NOUN_IDENTITY,
1823          info.identityId,
1824          false
1825        );
1826        if (identity) {
1827          // Unfortunately, although this fixes the (reachable) Identity and
1828          //  exposes the Contact, it does not make the Contact reachable from
1829          //  the collection manager.  This will make explicit queries that look
1830          //  up the contact potentially see the case where
1831          //  contact.identities[0].contact !== contact.  Alternately, that
1832          //  may not happen and instead the "contact" object we created above
1833          //  may become unlinked.  (I'd have to trace some logic I don't feel
1834          //  like tracing.)  Either way, The potential fallout is minimal
1835          //  since the object identity invariant will just lapse and popularity
1836          //  on the contact may become stale, and neither of those meaningfully
1837          //  affect the operation of anything in Thunderbird.
1838          // If we really cared, we could find all the dominant collections
1839          //  that reference the identity and update their corresponding
1840          //  contact collection to make it reachable.  That use-case does not
1841          //  exist outside of here, which is why we're punting.
1842          identity._contact = contact;
1843          contact._identities = [identity];
1844        }
1845
1846        // NOTE: If the addressbook indexer did anything useful other than
1847        //  adapting to name changes, we could schedule indexing of the cards at
1848        //  this time.  However, as of this writing, it doesn't, and this task
1849        //  is a one-off relevant only to the time of this writing.
1850      }
1851
1852      // - mark all folders as dirty, initiate indexing sweep
1853      this.dirtyAllKnownFolders();
1854      this.indexingSweepNeeded = true;
1855    }
1856
1857    // -- mark the schema upgrade, be done
1858    GlodaDatastore._updateSchemaVersion(GlodaDatastore._schemaVersion);
1859    yield this.kWorkDone;
1860  },
1861
1862  /**
1863   * Determine whether a folder is suitable for indexing.
1864   *
1865   * @param aMsgFolder An nsIMsgFolder you want to see if we should index.
1866   *
1867   * @returns true if we want to index messages in this type of folder, false if
1868   *     we do not.
1869   */
1870  shouldIndexFolder(aMsgFolder) {
1871    let folderFlags = aMsgFolder.flags;
1872    // Completely ignore non-mail and virtual folders.  They should never even
1873    //  get to be GlodaFolder instances.
1874    if (
1875      !(folderFlags & Ci.nsMsgFolderFlags.Mail) ||
1876      folderFlags & Ci.nsMsgFolderFlags.Virtual
1877    ) {
1878      return false;
1879    }
1880
1881    // Some folders do not really exist; we can detect this by getStringProperty
1882    //  exploding when we call it.  This is primarily a concern because
1883    //  _mapFolder calls said exploding method, but we also don't want to
1884    //  even think about indexing folders that don't exist.  (Such folders are
1885    //  likely the result of a messed up profile.)
1886    try {
1887      // flags is used because it should always be in the cache avoiding a miss
1888      //  which would compel an msf open.
1889      aMsgFolder.getStringProperty("flags");
1890    } catch (ex) {
1891      return false;
1892    }
1893
1894    // Now see what our gloda folder information has to say about the folder.
1895    let glodaFolder = GlodaDatastore._mapFolder(aMsgFolder);
1896    return glodaFolder.indexingPriority != glodaFolder.kIndexingNeverPriority;
1897  },
1898
1899  /**
1900   * Sets the indexing priority for this folder and persists it both to Gloda,
1901   * and, for backup purposes, to the nsIMsgFolder via string property as well.
1902   *
1903   * Setting this priority may cause the indexer to either reindex this folder,
1904   * or remove this folder from the existing index.
1905   *
1906   * @param {nsIMsgFolder} aFolder
1907   * @param {Number} aPriority (one of the priority constants from GlodaFolder)
1908   */
1909  setFolderIndexingPriority(aFolder, aPriority) {
1910    let glodaFolder = GlodaDatastore._mapFolder(aFolder);
1911
1912    // if there's been no change, we're done
1913    if (aPriority == glodaFolder.indexingPriority) {
1914      return;
1915    }
1916
1917    // save off the old priority, and set the new one
1918    let previousPrio = glodaFolder.indexingPriority;
1919    glodaFolder._indexingPriority = aPriority;
1920
1921    // persist the new priority
1922    GlodaDatastore.updateFolderIndexingPriority(glodaFolder);
1923    aFolder.setStringProperty("indexingPriority", Number(aPriority).toString());
1924
1925    // if we've been told never to index this folder...
1926    if (aPriority == glodaFolder.kIndexingNeverPriority) {
1927      // stop doing so
1928      if (this._indexingFolder == aFolder) {
1929        GlodaIndexer.killActiveJob();
1930      }
1931
1932      // mark all existing messages as deleted
1933      GlodaDatastore.markMessagesDeletedByFolderID(glodaFolder.id);
1934
1935      // re-index
1936      GlodaMsgIndexer.indexingSweepNeeded = true;
1937    } else if (previousPrio == glodaFolder.kIndexingNeverPriority) {
1938      // there's no existing index, but the user now wants one
1939      glodaFolder._dirtyStatus = glodaFolder.kFolderFilthy;
1940      GlodaDatastore.updateFolderDirtyStatus(glodaFolder);
1941      GlodaMsgIndexer.indexingSweepNeeded = true;
1942    }
1943  },
1944
1945  /**
1946   * Resets the indexing priority on the given folder to whatever the default
1947   * is for folders of that type.
1948   *
1949   * @note Calls setFolderIndexingPriority under the hood, so has identical
1950   *       potential reindexing side-effects
1951   *
1952   * @param {nsIMsgFolder} aFolder
1953   * @param {boolean} aAllowSpecialFolderIndexing
1954   */
1955  resetFolderIndexingPriority(aFolder, aAllowSpecialFolderIndexing) {
1956    this.setFolderIndexingPriority(
1957      aFolder,
1958      GlodaDatastore.getDefaultIndexingPriority(
1959        aFolder,
1960        aAllowSpecialFolderIndexing
1961      )
1962    );
1963  },
1964
1965  /**
1966   * Queue all of the folders of all of the accounts of the current profile
1967   *  for indexing.  We traverse all folders and queue them immediately to try
1968   *  and have an accurate estimate of the number of folders that need to be
1969   *  indexed.  (We previously queued accounts rather than immediately
1970   *  walking their list of folders.)
1971   */
1972  indexEverything() {
1973    this._log.info("Queueing all accounts for indexing.");
1974
1975    GlodaDatastore._beginTransaction();
1976    for (let account of MailServices.accounts.accounts) {
1977      this.indexAccount(account);
1978    }
1979    GlodaDatastore._commitTransaction();
1980  },
1981
1982  /**
1983   * Queue all of the folders belonging to an account for indexing.
1984   */
1985  indexAccount(aAccount) {
1986    let rootFolder = aAccount.incomingServer.rootFolder;
1987    if (rootFolder instanceof Ci.nsIMsgFolder) {
1988      this._log.info("Queueing account folders for indexing: " + aAccount.key);
1989
1990      for (let folder of rootFolder.descendants) {
1991        if (this.shouldIndexFolder(folder)) {
1992          GlodaIndexer.indexJob(
1993            new IndexingJob("folder", GlodaDatastore._mapFolder(folder).id)
1994          );
1995        }
1996      }
1997    } else {
1998      this._log.info("Skipping Account, root folder not nsIMsgFolder");
1999    }
2000  },
2001
2002  /**
2003   * Queue a single folder for indexing given an nsIMsgFolder.
2004   *
2005   * @param [aOptions.callback] A callback to invoke when the folder finishes
2006   *     indexing.  First argument is true if the task ran to completion
2007   *     successfully, false if we had to abort for some reason.
2008   * @param [aOptions.force=false] Should we force the indexing of all messages
2009   *     in the folder (true) or just index what hasn't been indexed (false).
2010   * @return true if we are going to index the folder, false if not.
2011   */
2012  indexFolder(aMsgFolder, aOptions) {
2013    if (!this.shouldIndexFolder(aMsgFolder)) {
2014      return false;
2015    }
2016    let glodaFolder = GlodaDatastore._mapFolder(aMsgFolder);
2017    // stay out of compacting/compacted folders
2018    if (glodaFolder.compacting || glodaFolder.compacted) {
2019      return false;
2020    }
2021
2022    this._log.info("Queue-ing folder for indexing: " + aMsgFolder.prettyName);
2023    let job = new IndexingJob("folder", glodaFolder.id);
2024    if (aOptions) {
2025      if ("callback" in aOptions) {
2026        job.callback = aOptions.callback;
2027      }
2028      if ("force" in aOptions) {
2029        job.force = true;
2030      }
2031    }
2032    GlodaIndexer.indexJob(job);
2033    return true;
2034  },
2035
2036  /**
2037   * Queue a list of messages for indexing.
2038   *
2039   * @param aFoldersAndMessages List of [nsIMsgFolder, message key] tuples.
2040   */
2041  indexMessages(aFoldersAndMessages) {
2042    let job = new IndexingJob("message", null);
2043    job.items = aFoldersAndMessages.map(fm => [
2044      GlodaDatastore._mapFolder(fm[0]).id,
2045      fm[1],
2046    ]);
2047    GlodaIndexer.indexJob(job);
2048  },
2049
2050  /**
2051   * Mark all known folders as dirty so that the next indexing sweep goes
2052   *  into all folders and checks their contents to see if they need to be
2053   *  indexed.
2054   *
2055   * This is being added for the migration case where we want to try and reindex
2056   *  all of the messages that had been marked with GLODA_BAD_MESSAGE_ID but
2057   *  which is now GLODA_OLD_BAD_MESSAGE_ID and so we should attempt to reindex
2058   *  them.
2059   */
2060  dirtyAllKnownFolders() {
2061    // Just iterate over the datastore's folder map and tell each folder to
2062    //  be dirty if its priority is not disabled.
2063    for (let folderID in GlodaDatastore._folderByID) {
2064      let glodaFolder = GlodaDatastore._folderByID[folderID];
2065      if (glodaFolder.indexingPriority !== glodaFolder.kIndexingNeverPriority) {
2066        glodaFolder._ensureFolderDirty();
2067      }
2068    }
2069  },
2070
2071  /**
2072   * Given a message header, return whether this message is likely to have
2073   * been indexed or not.
2074   *
2075   * This means the message must:
2076   * - Be in a folder eligible for gloda indexing. (Not News, etc.)
2077   * - Be in a non-filthy folder.
2078   * - Be gloda-indexed and non-filthy.
2079   *
2080   * @param aMsgHdr A message header.
2081   * @returns true if the message is likely to have been indexed.
2082   */
2083  isMessageIndexed(aMsgHdr) {
2084    // If it's in a folder that we flat out do not index, say no.
2085    if (!this.shouldIndexFolder(aMsgHdr.folder)) {
2086      return false;
2087    }
2088    let glodaFolder = GlodaDatastore._mapFolder(aMsgHdr.folder);
2089    let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(aMsgHdr);
2090    return (
2091      glodaId >= GLODA_FIRST_VALID_MESSAGE_ID &&
2092      glodaDirty != GlodaMsgIndexer.kMessageFilthy &&
2093      glodaFolder &&
2094      glodaFolder.dirtyStatus != glodaFolder.kFolderFilthy
2095    );
2096  },
2097
2098  /* *********** Event Processing *********** */
2099
2100  /**
2101   * Tracks messages we have received msgKeyChanged notifications for in order
2102   *  to provide batching and to suppress needless reindexing when we receive
2103   *  the expected follow-up msgsClassified notification.
2104   *
2105   * The entries in this dictionary should be extremely short-lived as we
2106   *  receive the msgKeyChanged notification as the offline fake header is
2107   *  converted into a real header (which is accompanied by a msgAdded
2108   *  notification we don't pay attention to).  Once the headers finish
2109   *  updating, the message classifier will get its at-bat and should likely
2110   *  find that the messages have already been classified and so fast-path
2111   *  them.
2112   *
2113   * The keys in this dictionary are chosen to be consistent with those of
2114   *  PendingCommitTracker: the folder.URI + "#" + the (new) message key.
2115   * The values in the dictionary are either an object with "id" (the gloda
2116   *  id), "key" (the new message key), and "dirty" (is it dirty and so
2117   *  should still be queued for indexing) attributes, or null indicating that
2118   *  no change in message key occurred and so no database changes are required.
2119   */
2120  _keyChangedBatchInfo: {},
2121
2122  /**
2123   * Common logic for things that want to feed event-driven indexing.  This gets
2124   *  called by both |_msgFolderListener.msgsClassified| when we are first
2125   *  seeing a message as well as by |_folderListener| when things happen to
2126   *  existing messages.  Although we could slightly specialize for the
2127   *  new-to-us case, it works out to be cleaner to just treat them the same
2128   *  and take a very small performance hit.
2129   *
2130   * @param aMsgHdrs array of messages to treat as potentially changed.
2131   * @param aDirtyingEvent Is this event inherently dirtying?  Receiving a
2132   *     msgsClassified notification is not inherently dirtying because it is
2133   *     just telling us that a message exists.  We use this knowledge to
2134   *     ignore the msgsClassified notifications for messages we have received
2135   *     msgKeyChanged notifications for and fast-pathed.  Since it is possible
2136   *     for user action to do something that dirties the message between the
2137   *     time we get the msgKeyChanged notification and when we receive the
2138   *     msgsClassified notification, we want to make sure we don't get
2139   *     confused.  (Although since we remove the message from our ignore-set
2140   *     after the first notification, we would likely just mistakenly treat
2141   *     the msgsClassified notification as something dirtying, so it would
2142   *     still work out...)
2143   */
2144  _reindexChangedMessages(aMsgHdrs, aDirtyingEvent) {
2145    let glodaIdsNeedingDeletion = null;
2146    let messageKeyChangedIds = null,
2147      messageKeyChangedNewKeys = null;
2148    for (let msgHdr of aMsgHdrs) {
2149      // -- Index this folder?
2150      let msgFolder = msgHdr.folder;
2151      if (!this.shouldIndexFolder(msgFolder)) {
2152        continue;
2153      }
2154      // -- Ignore messages in filthy folders!
2155      // A filthy folder can only be processed by an indexing sweep, and at
2156      //  that point the message will get indexed.
2157      let glodaFolder = GlodaDatastore._mapFolder(msgHdr.folder);
2158      if (glodaFolder.dirtyStatus == glodaFolder.kFolderFilthy) {
2159        continue;
2160      }
2161
2162      // -- msgKeyChanged event follow-up
2163      if (!aDirtyingEvent) {
2164        let keyChangedKey = msgHdr.folder.URI + "#" + msgHdr.messageKey;
2165        if (keyChangedKey in this._keyChangedBatchInfo) {
2166          var keyChangedInfo = this._keyChangedBatchInfo[keyChangedKey];
2167          delete this._keyChangedBatchInfo[keyChangedKey];
2168
2169          // Null means to ignore this message because the key did not change
2170          //  (and the message was not dirty so it is safe to ignore.)
2171          if (keyChangedInfo == null) {
2172            continue;
2173          }
2174          // (the key may be null if we only generated the entry because the
2175          //  message was dirty)
2176          if (keyChangedInfo.key !== null) {
2177            if (messageKeyChangedIds == null) {
2178              messageKeyChangedIds = [];
2179              messageKeyChangedNewKeys = [];
2180            }
2181            messageKeyChangedIds.push(keyChangedInfo.id);
2182            messageKeyChangedNewKeys.push(keyChangedInfo.key);
2183          }
2184          // ignore the message because it was not dirty
2185          if (!keyChangedInfo.isDirty) {
2186            continue;
2187          }
2188        }
2189      }
2190
2191      // -- Index this message?
2192      // We index local messages, IMAP messages that are offline, and IMAP
2193      // messages that aren't offline but whose folders aren't offline either
2194      let isFolderLocal = msgFolder instanceof Ci.nsIMsgLocalMailFolder;
2195      if (!isFolderLocal) {
2196        if (
2197          !(msgHdr.flags & Ci.nsMsgMessageFlags.Offline) &&
2198          msgFolder.getFlag(Ci.nsMsgFolderFlags.Offline)
2199        ) {
2200          continue;
2201        }
2202      }
2203      // Ignore messages whose processing flags indicate it has not yet been
2204      //  classified.  In the IMAP case if the Offline flag is going to get set
2205      //  we are going to see it before the msgsClassified event so this is
2206      //  very important.
2207      if (
2208        msgFolder.getProcessingFlags(msgHdr.messageKey) &
2209        NOT_YET_REPORTED_PROCESSING_FLAGS
2210      ) {
2211        continue;
2212      }
2213
2214      let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(msgHdr);
2215
2216      let isSpam =
2217        msgHdr.getStringProperty(JUNK_SCORE_PROPERTY) == JUNK_SPAM_SCORE_STR;
2218
2219      // -- Is the message currently gloda indexed?
2220      if (
2221        glodaId >= GLODA_FIRST_VALID_MESSAGE_ID &&
2222        glodaDirty != this.kMessageFilthy
2223      ) {
2224        // - Is the message spam?
2225        if (isSpam) {
2226          // Treat this as a deletion...
2227          if (!glodaIdsNeedingDeletion) {
2228            glodaIdsNeedingDeletion = [];
2229          }
2230          glodaIdsNeedingDeletion.push(glodaId);
2231          // and skip to the next message
2232          continue;
2233        }
2234
2235        // - Mark the message dirty if it is clean.
2236        // (This is the only case in which we need to mark dirty so that the
2237        //  indexing sweep takes care of things if we don't process this in
2238        //  an event-driven fashion.  If the message has no gloda-id or does
2239        //  and it's already dirty or filthy, it is already marked for
2240        //  indexing.)
2241        if (glodaDirty == this.kMessageClean) {
2242          msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, this.kMessageDirty);
2243        }
2244        // if the message is pending clean, this change invalidates that.
2245        PendingCommitTracker.noteDirtyHeader(msgHdr);
2246      } else if (isSpam) {
2247        // If it's not indexed but is spam, ignore it.
2248        continue;
2249      }
2250      // (we want to index the message if we are here)
2251
2252      // mark the folder dirty too, so we know to look inside
2253      glodaFolder._ensureFolderDirty();
2254
2255      if (this._pendingAddJob == null) {
2256        this._pendingAddJob = new IndexingJob("message", null);
2257        GlodaIndexer.indexJob(this._pendingAddJob);
2258      }
2259      // only queue the message if we haven't overflowed our event-driven budget
2260      if (this._pendingAddJob.items.length < this._indexMaxEventQueueMessages) {
2261        this._pendingAddJob.items.push([
2262          GlodaDatastore._mapFolder(msgFolder).id,
2263          msgHdr.messageKey,
2264        ]);
2265      } else {
2266        this.indexingSweepNeeded = true;
2267      }
2268    }
2269
2270    // Process any message key changes (from earlier msgKeyChanged events)
2271    if (messageKeyChangedIds != null) {
2272      GlodaDatastore.updateMessageKeys(
2273        messageKeyChangedIds,
2274        messageKeyChangedNewKeys
2275      );
2276    }
2277
2278    // If we accumulated any deletions in there, batch them off now.
2279    if (glodaIdsNeedingDeletion) {
2280      GlodaDatastore.markMessagesDeletedByIDs(glodaIdsNeedingDeletion);
2281      this.pendingDeletions = true;
2282    }
2283  },
2284
2285  /* ***** Folder Changes ***** */
2286  /**
2287   * All additions and removals are queued for processing.  Indexing messages
2288   *  is potentially phenomenally expensive, and deletion can still be
2289   *  relatively expensive due to our need to delete the message, its
2290   *  attributes, and all attributes that reference it.  Additionally,
2291   *  attribute deletion costs are higher than attribute look-up because
2292   *  there is the actual row plus its 3 indices, and our covering indices are
2293   *  no help there.
2294   *
2295   */
2296  _msgFolderListener: {
2297    indexer: null,
2298
2299    /**
2300     * We no longer use the msgAdded notification, instead opting to wait until
2301     *  junk/trait classification has run (or decided not to run) and all
2302     *  filters have run.  The msgsClassified notification provides that for us.
2303     */
2304    msgAdded(aMsgHdr) {
2305      // we are never called! we do not enable this bit!
2306    },
2307
2308    /**
2309     * Process (apparently newly added) messages that have been looked at by
2310     *  the message classifier.  This ensures that if the message was going
2311     *  to get marked as spam, this will have already happened.
2312     *
2313     * Besides truly new (to us) messages, We will also receive this event for
2314     *  messages that are the result of IMAP message move/copy operations,
2315     *  including both moves that generated offline fake headers and those that
2316     *  did not.  In the offline fake header case, however, we are able to
2317     *  ignore their msgsClassified events because we will have received a
2318     *  msgKeyChanged notification sometime in the recent past.
2319     */
2320    msgsClassified(aMsgHdrs, aJunkClassified, aTraitClassified) {
2321      this.indexer._log.debug("msgsClassified notification");
2322      try {
2323        GlodaMsgIndexer._reindexChangedMessages(aMsgHdrs, false);
2324      } catch (ex) {
2325        this.indexer._log.error("Explosion in msgsClassified handling:", ex);
2326      }
2327    },
2328
2329    /**
2330     * Any messages which have had their junk state changed are marked for
2331     * reindexing.
2332     */
2333    msgsJunkStatusChanged(messages) {
2334      this.indexer._log.debug("JunkStatusChanged notification");
2335      GlodaMsgIndexer._reindexChangedMessages(messages, true);
2336    },
2337
2338    /**
2339     * Handle real, actual deletion (move to trash and IMAP deletion model
2340     *  don't count); we only see the deletion here when it becomes forever,
2341     *  or rather _just before_ it becomes forever.  Because the header is
2342     *  going away, we need to either process things immediately or extract the
2343     *  information required to purge it later without the header.
2344     * To this end, we mark all messages that were indexed in the gloda message
2345     *  database as deleted.  We set our pending deletions flag to let our
2346     *  indexing logic know that after its next wave of folder traversal, it
2347     *  should perform a deletion pass.  If it turns out the messages are coming
2348     *  back, the fact that deletion is thus deferred can be handy, as we can
2349     *  reuse the existing gloda message.
2350     */
2351    msgsDeleted(aMsgHdrs) {
2352      this.indexer._log.debug("msgsDeleted notification");
2353      let glodaMessageIds = [];
2354
2355      for (let msgHdr of aMsgHdrs) {
2356        let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(msgHdr);
2357        if (
2358          glodaId >= GLODA_FIRST_VALID_MESSAGE_ID &&
2359          glodaDirty != GlodaMsgIndexer.kMessageFilthy
2360        ) {
2361          glodaMessageIds.push(glodaId);
2362        }
2363      }
2364
2365      if (glodaMessageIds.length) {
2366        GlodaMsgIndexer._datastore.markMessagesDeletedByIDs(glodaMessageIds);
2367        GlodaMsgIndexer.pendingDeletions = true;
2368      }
2369    },
2370
2371    /**
2372     * Process a move or copy.
2373     *
2374     * Moves to a local folder or an IMAP folder where we are generating offline
2375     *  fake headers are dealt with efficiently because we get both the source
2376     *  and destination headers.  The main ingredient to having offline fake
2377     *  headers is that allowUndo was true when the operation was performance.
2378     *  The only non-obvious thing is that we need to make sure that we deal
2379     *  with the impact of filthy folders and messages on gloda-id's (they
2380     *  invalidate the gloda-id).
2381     *
2382     * Moves to an IMAP folder that do not generate offline fake headers do not
2383     *  provide us with the target header, but the IMAP SetPendingAttributes
2384     *  logic will still attempt to propagate the properties on the message
2385     *  header so when we eventually see it in the msgsClassified notification,
2386     *  it should have the properties of the source message copied over.
2387     * We make sure that gloda-id's do not get propagated when messages are
2388     *  moved from IMAP folders that are marked filthy or are marked as not
2389     *  supposed to be indexed by clearing the pending attributes for the header
2390     *  being tracked by the destination IMAP folder.
2391     * We could fast-path the IMAP move case in msgsClassified by noticing that
2392     *  a message is showing up with a gloda-id header already and just
2393     *  performing an async location update.
2394     *
2395     * Moves that occur involving 'compacted' folders are fine and do not
2396     *  require special handling here.  The one tricky super-edge-case that
2397     *  can happen (and gets handled by the compaction pass) is the move of a
2398     *  message that got gloda indexed that did not already have a gloda-id and
2399     *  PendingCommitTracker did not get to flush the gloda-id before the
2400     *  compaction happened.  In that case our move logic cannot know to do
2401     *  anything and the gloda database still thinks the message lives in our
2402     *  folder.  The compaction pass will deal with this by marking the message
2403     *  as deleted.  The rationale being that marking it deleted allows the
2404     *  message to be re-used if it gets indexed in the target location, or if
2405     *  the target location has already been indexed, we no longer need the
2406     *  duplicate and it should be deleted.  (Also, it is unable to distinguish
2407     *  between a case where the message got deleted versus moved.)
2408     *
2409     * Because copied messages are, by their nature, duplicate messages, we
2410     *  do not particularly care about them.  As such, we defer their processing
2411     *  to the automatic sync logic that will happen much later on.  This is
2412     *  potentially desirable in case the user deletes some of the original
2413     *  messages, allowing us to reuse the gloda message representations when
2414     *  we finally get around to indexing the messages.  We do need to mark the
2415     *  folder as dirty, though, to clue in the sync logic.
2416     */
2417    msgsMoveCopyCompleted(aMove, aSrcMsgHdrs, aDestFolder, aDestMsgHdrs) {
2418      this.indexer._log.debug("MoveCopy notification.  Move: " + aMove);
2419      try {
2420        // ---- Move
2421        if (aMove) {
2422          // -- Effectively a deletion?
2423          // If the destination folder is not indexed, it's like these messages
2424          //  are being deleted.
2425          if (!GlodaMsgIndexer.shouldIndexFolder(aDestFolder)) {
2426            this.msgsDeleted(aSrcMsgHdrs);
2427            return;
2428          }
2429
2430          // -- Avoid propagation of filthy gloda-id's.
2431          // If the source folder is filthy or should not be indexed (and so
2432          //  any gloda-id's found in there are gibberish), our only job is to
2433          //  strip the gloda-id's off of all the destination headers because
2434          //  none of the gloda-id's are valid (and so we certainly don't want
2435          //  to try and use them as a basis for updating message keys.)
2436          let srcMsgFolder = aSrcMsgHdrs[0].folder;
2437          if (
2438            !this.indexer.shouldIndexFolder(srcMsgFolder) ||
2439            GlodaDatastore._mapFolder(srcMsgFolder).dirtyStatus ==
2440              GlodaFolder.prototype.kFolderFilthy
2441          ) {
2442            // Local case, just modify the destination headers directly.
2443            if (aDestMsgHdrs.length > 0) {
2444              for (let destMsgHdr of aDestMsgHdrs) {
2445                // zero it out if it exists
2446                // (no need to deal with pending commit issues here; a filthy
2447                //  folder by definition has nothing indexed in it.)
2448                let glodaId = destMsgHdr.getUint32Property(
2449                  GLODA_MESSAGE_ID_PROPERTY
2450                );
2451                if (glodaId) {
2452                  destMsgHdr.setUint32Property(GLODA_MESSAGE_ID_PROPERTY, 0);
2453                }
2454              }
2455
2456              // Since we are moving messages from a folder where they were
2457              //  effectively not indexed, it is up to us to make sure the
2458              //  messages now get indexed.
2459              this.indexer._reindexChangedMessages(aDestMsgHdrs);
2460              return;
2461            }
2462
2463            // IMAP move case, we need to operate on the pending headers using
2464            //  the source header to get the pending header and as the
2465            //  indication of what has been already set on the pending header.
2466            let destDb;
2467            // so, this can fail, and there's not much we can do about it.
2468            try {
2469              destDb = aDestFolder.msgDatabase;
2470            } catch (ex) {
2471              this.indexer._log.warn(
2472                "Destination database for " +
2473                  aDestFolder.prettyName +
2474                  " not ready on IMAP move." +
2475                  " Gloda corruption possible."
2476              );
2477              return;
2478            }
2479            for (let srcMsgHdr of aSrcMsgHdrs) {
2480              // zero it out if it exists
2481              // (no need to deal with pending commit issues here; a filthy
2482              //  folder by definition has nothing indexed in it.)
2483              let glodaId = srcMsgHdr.getUint32Property(
2484                GLODA_MESSAGE_ID_PROPERTY
2485              );
2486              if (glodaId) {
2487                destDb.setUint32AttributeOnPendingHdr(
2488                  srcMsgHdr,
2489                  GLODA_MESSAGE_ID_PROPERTY,
2490                  0
2491                );
2492              }
2493            }
2494
2495            // Nothing remains to be done.  The msgClassified event will take
2496            //  care of making sure the message gets indexed.
2497            return;
2498          }
2499
2500          // --- Have destination headers (local case):
2501          if (aDestMsgHdrs.length > 0) {
2502            // -- Update message keys for valid gloda-id's.
2503            // (Which means ignore filthy gloda-id's.)
2504            let glodaIds = [];
2505            let newMessageKeys = [];
2506            // Track whether we see any messages that are not gloda indexed so
2507            //  we know if we have to mark the destination folder dirty.
2508            let sawNonGlodaMessage = false;
2509            for (let iMsg = 0; iMsg < aSrcMsgHdrs.length; iMsg++) {
2510              let srcMsgHdr = aSrcMsgHdrs[iMsg];
2511              let destMsgHdr = aDestMsgHdrs[iMsg];
2512
2513              let [glodaId, dirtyStatus] = PendingCommitTracker.getGlodaState(
2514                srcMsgHdr
2515              );
2516              if (
2517                glodaId >= GLODA_FIRST_VALID_MESSAGE_ID &&
2518                dirtyStatus != GlodaMsgIndexer.kMessageFilthy
2519              ) {
2520                // we may need to update the pending commit map (it checks)
2521                PendingCommitTracker.noteMove(srcMsgHdr, destMsgHdr);
2522                // but we always need to update our database
2523                glodaIds.push(glodaId);
2524                newMessageKeys.push(destMsgHdr.messageKey);
2525              } else {
2526                sawNonGlodaMessage = true;
2527              }
2528            }
2529
2530            // this method takes care to update the in-memory representations
2531            //  too; we don't need to do anything
2532            if (glodaIds.length) {
2533              GlodaDatastore.updateMessageLocations(
2534                glodaIds,
2535                newMessageKeys,
2536                aDestFolder
2537              );
2538            }
2539
2540            // Mark the destination folder dirty if we saw any messages that
2541            //  were not already gloda indexed.
2542            if (sawNonGlodaMessage) {
2543              let destGlodaFolder = GlodaDatastore._mapFolder(aDestFolder);
2544              destGlodaFolder._ensureFolderDirty();
2545              this.indexer.indexingSweepNeeded = true;
2546            }
2547          } else {
2548            // --- No dest headers (IMAP case):
2549            // Update any valid gloda indexed messages into their new folder to
2550            //  make the indexer's life easier when it sees the messages in their
2551            //  new folder.
2552            let glodaIds = [];
2553
2554            let srcFolderIsLocal =
2555              srcMsgFolder instanceof Ci.nsIMsgLocalMailFolder;
2556            for (let msgHdr of aSrcMsgHdrs) {
2557              let [glodaId, dirtyStatus] = PendingCommitTracker.getGlodaState(
2558                msgHdr
2559              );
2560              if (
2561                glodaId >= GLODA_FIRST_VALID_MESSAGE_ID &&
2562                dirtyStatus != GlodaMsgIndexer.kMessageFilthy
2563              ) {
2564                // we may need to update the pending commit map (it checks)
2565                PendingCommitTracker.noteBlindMove(msgHdr);
2566                // but we always need to update our database
2567                glodaIds.push(glodaId);
2568
2569                // XXX UNDO WORKAROUND
2570                // This constitutes a move from a local folder to an IMAP
2571                //  folder.  Undo does not currently do the right thing for us,
2572                //  but we have a chance of not orphaning the message if we
2573                //  mark the source header as dirty so that when the message
2574                //  gets re-added we see it.  (This does require that we enter
2575                //  the folder; we set the folder dirty after the loop to
2576                //  increase the probability of this but it's not foolproof
2577                //  depending on when the next indexing sweep happens and when
2578                //  the user performs an undo.)
2579                msgHdr.setUint32Property(
2580                  GLODA_DIRTY_PROPERTY,
2581                  GlodaMsgIndexer.kMessageDirty
2582                );
2583              }
2584            }
2585            // XXX ALSO UNDO WORKAROUND
2586            if (srcFolderIsLocal) {
2587              let srcGlodaFolder = GlodaDatastore._mapFolder(srcMsgFolder);
2588              srcGlodaFolder._ensureFolderDirty();
2589            }
2590
2591            // quickly move them to the right folder, zeroing their message keys
2592            GlodaDatastore.updateMessageFoldersByKeyPurging(
2593              glodaIds,
2594              aDestFolder
2595            );
2596            // we _do not_ need to mark the folder as dirty, because the
2597            //  message added events will cause that to happen.
2598          }
2599        } else {
2600          // ---- Copy case
2601          // -- Do not propagate gloda-id's for copies
2602          // (Only applies if we have the destination header, which means local)
2603          for (let destMsgHdr of aDestMsgHdrs) {
2604            let glodaId = destMsgHdr.getUint32Property(
2605              GLODA_MESSAGE_ID_PROPERTY
2606            );
2607            if (glodaId) {
2608              destMsgHdr.setUint32Property(GLODA_MESSAGE_ID_PROPERTY, 0);
2609            }
2610          }
2611
2612          // mark the folder as dirty; we'll get to it later.
2613          let destGlodaFolder = GlodaDatastore._mapFolder(aDestFolder);
2614          destGlodaFolder._ensureFolderDirty();
2615          this.indexer.indexingSweepNeeded = true;
2616        }
2617      } catch (ex) {
2618        this.indexer._log.error(
2619          "Problem encountered during message move/copy:",
2620          ex.stack
2621        );
2622      }
2623    },
2624
2625    /**
2626     * Queue up message key changes that are a result of offline fake headers
2627     *  being made real for the actual update during the msgsClassified
2628     *  notification that is expected after this.  We defer the
2629     *  actual work (if there is any to be done; the fake header might have
2630     *  guessed the right UID correctly) so that we can batch our work.
2631     *
2632     * The expectation is that there will be no meaningful time window between
2633     *  this notification and the msgsClassified notification since the message
2634     *  classifier should not actually need to classify the messages (they
2635     *  should already have been classified) and so can fast-path them.
2636     */
2637    msgKeyChanged(aOldMsgKey, aNewMsgHdr) {
2638      try {
2639        let val = null,
2640          newKey = aNewMsgHdr.messageKey;
2641        let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(
2642          aNewMsgHdr
2643        );
2644        // If we haven't indexed this message yet, take no action, and leave it
2645        // up to msgsClassified to take proper action.
2646        if (glodaId < GLODA_FIRST_VALID_MESSAGE_ID) {
2647          return;
2648        }
2649        // take no action on filthy messages,
2650        // generate an entry if dirty or the keys don't match.
2651        if (
2652          glodaDirty !== GlodaMsgIndexer.kMessageFilthy &&
2653          (glodaDirty === GlodaMsgIndexer.kMessageDirty ||
2654            aOldMsgKey !== newKey)
2655        ) {
2656          val = {
2657            id: glodaId,
2658            key: aOldMsgKey !== newKey ? newKey : null,
2659            isDirty: glodaDirty === GlodaMsgIndexer.kMessageDirty,
2660          };
2661        }
2662
2663        let key = aNewMsgHdr.folder.URI + "#" + aNewMsgHdr.messageKey;
2664        this.indexer._keyChangedBatchInfo[key] = val;
2665      } catch (ex) {
2666        // this is more for the unit test to fail rather than user error reporting
2667        this.indexer._log.error(
2668          "Problem encountered during msgKeyChanged" +
2669            " notification handling: " +
2670            ex +
2671            "\n\n" +
2672            ex.stack +
2673            " \n\n"
2674        );
2675      }
2676    },
2677
2678    /**
2679     * Detect newly added folders before they get messages so we map them before
2680     * they get any messages added to them.  If we only hear about them after
2681     * they get their 1st message, then we will mark them filthy, but if we mark
2682     * them before that, they get marked clean.
2683     */
2684    folderAdded(aMsgFolder) {
2685      // This is invoked for its side-effect of invoking _mapFolder and doing so
2686      // only after filtering out folders we don't care about.
2687      GlodaMsgIndexer.shouldIndexFolder(aMsgFolder);
2688    },
2689
2690    /**
2691     * Handles folder no-longer-exists-ence.  We mark all messages as deleted
2692     *  and remove the folder from our URI table.  Currently, if a folder that
2693     *  contains other folders is deleted, we may either receive one
2694     *  notification for the folder that is deleted, or a notification for the
2695     *  folder and one for each of its descendents.  This depends upon the
2696     *  underlying account implementation, so we explicitly handle each case.
2697     *  Namely, we treat it as if we're only planning on getting one, but we
2698     *  handle if the children are already gone for some reason.
2699     */
2700    folderDeleted(aFolder) {
2701      this.indexer._log.debug("folderDeleted notification");
2702      try {
2703        let delFunc = function(aFolder, indexer) {
2704          if (indexer._datastore._folderKnown(aFolder)) {
2705            indexer._log.info(
2706              "Processing deletion of folder " + aFolder.prettyName + "."
2707            );
2708            let glodaFolder = GlodaDatastore._mapFolder(aFolder);
2709            indexer._datastore.markMessagesDeletedByFolderID(glodaFolder.id);
2710            indexer._datastore.deleteFolderByID(glodaFolder.id);
2711            GlodaDatastore._killGlodaFolderIntoTombstone(glodaFolder);
2712          } else {
2713            indexer._log.info(
2714              "Ignoring deletion of folder " +
2715                aFolder.prettyName +
2716                " because it is unknown to gloda."
2717            );
2718          }
2719        };
2720
2721        let descendentFolders = aFolder.descendants;
2722        // (the order of operations does not matter; child, non-child, whatever.)
2723        // delete the parent
2724        delFunc(aFolder, this.indexer);
2725        // delete all its descendents
2726        for (let folder of descendentFolders) {
2727          delFunc(folder, this.indexer);
2728        }
2729
2730        this.indexer.pendingDeletions = true;
2731      } catch (ex) {
2732        this.indexer._log.error(
2733          "Problem encountered during folder deletion" +
2734            ": " +
2735            ex +
2736            "\n\n" +
2737            ex.stack +
2738            "\n\n"
2739        );
2740      }
2741    },
2742
2743    /**
2744     * Handle a folder being copied or moved.
2745     * Moves are handled by a helper function shared with _folderRenameHelper
2746     *  (which takes care of any nesting involved).
2747     * Copies are actually ignored, because our periodic indexing traversal
2748     *  should discover these automatically.  We could hint ourselves into
2749     *  action, but arguably a set of completely duplicate messages is not
2750     *  a high priority for indexing.
2751     */
2752    folderMoveCopyCompleted(aMove, aSrcFolder, aDestFolder) {
2753      this.indexer._log.debug(
2754        "folderMoveCopy notification (Move: " + aMove + ")"
2755      );
2756      if (aMove) {
2757        let srcURI = aSrcFolder.URI;
2758        let targetURI =
2759          aDestFolder.URI + srcURI.substring(srcURI.lastIndexOf("/"));
2760        this._folderRenameHelper(aSrcFolder, targetURI);
2761      } else {
2762        this.indexer.indexingSweepNeeded = true;
2763      }
2764    },
2765
2766    /**
2767     * We just need to update the URI <-> ID maps and the row in the database,
2768     *  all of which is actually done by the datastore for us.
2769     * This method needs to deal with the complexity where local folders will
2770     *  generate a rename notification for each sub-folder, but IMAP folders
2771     *  will generate only a single notification.  Our logic primarily handles
2772     *  this by not exploding if the original folder no longer exists.
2773     */
2774    _folderRenameHelper(aOrigFolder, aNewURI) {
2775      let newFolder = MailUtils.getOrCreateFolder(aNewURI);
2776      let specialFolderFlags =
2777        Ci.nsMsgFolderFlags.Trash | Ci.nsMsgFolderFlags.Junk;
2778      if (newFolder.isSpecialFolder(specialFolderFlags, true)) {
2779        let descendentFolders = newFolder.descendants;
2780
2781        // First thing to do: make sure we don't index the resulting folder and
2782        //  its descendents.
2783        GlodaMsgIndexer.resetFolderIndexingPriority(newFolder);
2784        for (let folder of descendentFolders) {
2785          GlodaMsgIndexer.resetFolderIndexingPriority(folder);
2786        }
2787
2788        // Remove from the index messages from the original folder
2789        this.folderDeleted(aOrigFolder);
2790      } else {
2791        let descendentFolders = aOrigFolder.descendants;
2792
2793        let origURI = aOrigFolder.URI;
2794        // this rename is straightforward.
2795        GlodaDatastore.renameFolder(aOrigFolder, aNewURI);
2796
2797        for (let folder of descendentFolders) {
2798          let oldSubURI = folder.URI;
2799          // mangle a new URI from the old URI.  we could also try and do a
2800          //  parallel traversal of the new folder hierarchy, but that seems like
2801          //  more work.
2802          let newSubURI = aNewURI + oldSubURI.substring(origURI.length);
2803          this.indexer._datastore.renameFolder(oldSubURI, newSubURI);
2804        }
2805
2806        this.indexer._log.debug(
2807          "folder renamed: " + origURI + " to " + aNewURI
2808        );
2809      }
2810    },
2811
2812    /**
2813     * Handle folder renames, dispatching to our rename helper (which also
2814     *  takes care of any nested folder issues.)
2815     */
2816    folderRenamed(aOrigFolder, aNewFolder) {
2817      this._folderRenameHelper(aOrigFolder, aNewFolder.URI);
2818    },
2819
2820    /**
2821     * Helper used by folderCompactStart/folderReindexTriggered.
2822     */
2823    _reindexFolderHelper(folder, isCompacting) {
2824      // ignore folders we ignore...
2825      if (!GlodaMsgIndexer.shouldIndexFolder(folder)) {
2826        return;
2827      }
2828
2829      let glodaFolder = GlodaDatastore._mapFolder(folder);
2830      if (isCompacting) {
2831        glodaFolder.compacting = true;
2832      }
2833
2834      // Purge any explicit indexing of said folder.
2835      GlodaIndexer.purgeJobsUsingFilter(function(aJob) {
2836        return aJob.jobType == "folder" && aJob.id == folder.id;
2837      });
2838
2839      // Abort the active job if it's in the folder (this covers both
2840      //  event-driven indexing that happens to be in the folder as well
2841      //  explicit folder indexing of the folder).
2842      if (GlodaMsgIndexer._indexingFolder == folder) {
2843        GlodaIndexer.killActiveJob();
2844      }
2845
2846      // Tell the PendingCommitTracker to throw away anything it is tracking
2847      //  about the folder.  We will pick up the pieces in the compaction
2848      //  pass.
2849      PendingCommitTracker.noteFolderDatabaseGettingBlownAway(folder);
2850
2851      // (We do not need to mark the folder dirty because if we were indexing
2852      //  it, it already must have been marked dirty.)
2853    },
2854
2855    /**
2856     * folderCompactStart: Mark the folder as compacting in our in-memory
2857     * representation.  This should keep any new indexing out of the folder
2858     * until it is done compacting.  Also, kill any active or existing jobs
2859     * to index the folder.
2860     */
2861    folderCompactStart(folder) {
2862      this._reindexFolderHelper(folder, true);
2863    },
2864
2865    /**
2866     * folderReindexTriggered: We do the same thing as folderCompactStart
2867     * but don't mark the folder as compacting.
2868     */
2869    folderReindexTriggered(folder) {
2870      this._reindexFolderHelper(folder, false);
2871    },
2872
2873    /**
2874     * folderCompactFinish: Mark the folder as done compacting in our
2875     * in-memory representation.  Assuming the folder was known to us and
2876     * not marked filthy, queue a compaction job.
2877     */
2878    folderCompactFinish(folder) {
2879      // ignore folders we ignore...
2880      if (!GlodaMsgIndexer.shouldIndexFolder(folder)) {
2881        return;
2882      }
2883
2884      let glodaFolder = GlodaDatastore._mapFolder(folder);
2885      glodaFolder.compacting = false;
2886      glodaFolder._setCompactedState(true);
2887
2888      // Queue compaction unless the folder was filthy (in which case there
2889      //  are no valid gloda-id's to update.)
2890      if (glodaFolder.dirtyStatus != glodaFolder.kFolderFilthy) {
2891        GlodaIndexer.indexJob(new IndexingJob("folderCompact", glodaFolder.id));
2892      }
2893
2894      // Queue indexing of the folder if it is dirty.  We are doing this
2895      //  mainly in case we were indexing it before the compaction started.
2896      //  It should be reasonably harmless if we weren't.
2897      // (It would probably be better to just make sure that there is an
2898      //  indexing sweep queued or active, and if it's already active that
2899      //  this folder is in the queue to be processed.)
2900      if (glodaFolder.dirtyStatus == glodaFolder.kFolderDirty) {
2901        GlodaIndexer.indexJob(new IndexingJob("folder", glodaFolder.id));
2902      }
2903    },
2904  },
2905
2906  /**
2907   * A nsIFolderListener (listening on nsIMsgMailSession so we get all of
2908   *  these events) PRIMARILY to get folder loaded notifications.  Because of
2909   *  deficiencies in the nsIMsgFolderListener's events at this time, we also
2910   *  get our folder-added and newsgroup notifications from here for now.  (This
2911   *  will be rectified.)
2912   */
2913  _folderListener: {
2914    indexer: null,
2915
2916    _init(aIndexer) {
2917      this.indexer = aIndexer;
2918    },
2919
2920    OnItemAdded(aParentItem, aItem) {},
2921    OnItemRemoved(aParentItem, aItem) {},
2922    OnItemPropertyChanged(aItem, aProperty, aOldValue, aNewValue) {},
2923    /**
2924     * Detect changes to folder flags and reset our indexing priority.  This
2925     * is important because (all?) folders start out without any flags and
2926     * then get their flags added to them.
2927     */
2928    OnItemIntPropertyChanged(aFolderItem, aProperty, aOldValue, aNewValue) {
2929      if (aProperty !== "FolderFlag") {
2930        return;
2931      }
2932      if (!GlodaMsgIndexer.shouldIndexFolder(aFolderItem)) {
2933        return;
2934      }
2935      // Only reset priority if folder Special Use changes.
2936      if (
2937        (aOldValue & Ci.nsMsgFolderFlags.SpecialUse) ==
2938        (aNewValue & Ci.nsMsgFolderFlags.SpecialUse)
2939      ) {
2940        return;
2941      }
2942      GlodaMsgIndexer.resetFolderIndexingPriority(aFolderItem);
2943    },
2944    OnItemBoolPropertyChanged(aItem, aProperty, aOldValue, aNewValue) {},
2945    OnItemUnicharPropertyChanged(aItem, aProperty, aOldValue, aNewValue) {},
2946    /**
2947     * Notice when user activity adds/removes tags or changes a message's
2948     *  status.
2949     */
2950    OnItemPropertyFlagChanged(aMsgHdr, aProperty, aOldValue, aNewValue) {
2951      if (
2952        aProperty == "Keywords" ||
2953        // We could care less about the new flag changing.
2954        (aProperty == "Status" &&
2955          (aOldValue ^ aNewValue) != Ci.nsMsgMessageFlags.New &&
2956          // We do care about IMAP deletion, but msgsDeleted tells us that, so
2957          //  ignore IMAPDeleted too...
2958          (aOldValue ^ aNewValue) != Ci.nsMsgMessageFlags.IMAPDeleted) ||
2959        aProperty == "Flagged"
2960      ) {
2961        GlodaMsgIndexer._reindexChangedMessages([aMsgHdr], true);
2962      }
2963    },
2964
2965    /**
2966     * Get folder loaded notifications for folders that had to do some
2967     *  (asynchronous) processing before they could be opened.
2968     */
2969    OnItemEvent(aFolder, aEvent) {
2970      if (aEvent == "FolderLoaded") {
2971        this.indexer._onFolderLoaded(aFolder);
2972      }
2973    },
2974  },
2975
2976  /* ***** Rebuilding / Reindexing ***** */
2977  /**
2978   * Allow us to invalidate an outstanding folder traversal because the
2979   *  underlying database is going away.  We use other means for detecting
2980   *  modifications of the message (labeling, marked (un)read, starred, etc.)
2981   *
2982   * This is an nsIDBChangeListener listening to an nsIDBChangeAnnouncer.  To
2983   *  add ourselves, we get us a nice nsMsgDatabase, query it to the announcer,
2984   *  then call AddListener.
2985   */
2986  _databaseAnnouncerListener: {
2987    indexer: null,
2988    /**
2989     * XXX We really should define the operations under which we expect this to
2990     *  occur.  While we know this must be happening as the result of a
2991     *  ForceClosed call, we don't have a comprehensive list of when this is
2992     *  expected to occur.  Some reasons:
2993     * - Compaction (although we should already have killed the job thanks to
2994     *    our compaction notification)
2995     * - UID validity rolls.
2996     * - Folder Rename
2997     * - Folder Delete
2998     * The fact that we already have the database open when getting this means
2999     *  that it had to be valid before we opened it, which hopefully rules out
3000     *  modification of the mbox file by an external process (since that is
3001     *  forbidden when we are running) and many other exotic things.
3002     *
3003     * So this really ends up just being a correctness / safety protection
3004     *  mechanism.  At least now that we have better compaction support.
3005     */
3006    onAnnouncerGoingAway(aDBChangeAnnouncer) {
3007      // The fact that we are getting called means we have an active folder and
3008      //  that we therefore are the active job.  As such, we must kill the
3009      //  active job.
3010      // XXX In the future, when we support interleaved event-driven indexing
3011      //  that bumps long-running indexing tasks, the semantics of this will
3012      //  have to change a bit since we will want to maintain being active in a
3013      //  folder even when bumped.  However, we will probably have a more
3014      //  complex notion of indexing contexts on a per-job basis.
3015      GlodaIndexer.killActiveJob();
3016    },
3017
3018    onHdrFlagsChanged(aHdrChanged, aOldFlags, aNewFlags, aInstigator) {},
3019    onHdrDeleted(aHdrChanged, aParentKey, aFlags, aInstigator) {},
3020    onHdrAdded(aHdrChanged, aParentKey, aFlags, aInstigator) {},
3021    onParentChanged(aKeyChanged, aOldParent, aNewParent, aInstigator) {},
3022    onReadChanged(aInstigator) {},
3023    onJunkScoreChanged(aInstigator) {},
3024    onHdrPropertyChanged(aHdrToChange, aPreChange, aStatus, aInstigator) {},
3025    onEvent(aDB, aEvent) {},
3026  },
3027
3028  /**
3029   * Given a list of Message-ID's, return a matching list of lists of messages
3030   *  matching those Message-ID's.  So if you pass an array with three
3031   *  Message-ID's ["a", "b", "c"], you would get back an array containing
3032   *  3 lists, where the first list contains all the messages with a message-id
3033   *  of "a", and so forth.  The reason a list is returned rather than null/a
3034   *  message is that we accept the reality that we have multiple copies of
3035   *  messages with the same ID.
3036   * This call is asynchronous because it depends on previously created messages
3037   *  to be reflected in our results, which requires us to execute on the async
3038   *  thread where all our writes happen.  This also turns out to be a
3039   *  reasonable thing because we could imagine pathological cases where there
3040   *  could be a lot of message-id's and/or a lot of messages with those
3041   *  message-id's.
3042   *
3043   * The returned collection will include both 'ghost' messages (messages
3044   *  that exist for conversation-threading purposes only) as well as deleted
3045   *  messages in addition to the normal 'live' messages that non-privileged
3046   *  queries might return.
3047   */
3048  getMessagesByMessageID(aMessageIDs, aCallback, aCallbackThis) {
3049    let msgIDToIndex = {};
3050    let results = [];
3051    for (let iID = 0; iID < aMessageIDs.length; ++iID) {
3052      let msgID = aMessageIDs[iID];
3053      results.push([]);
3054      msgIDToIndex[msgID] = iID;
3055    }
3056
3057    // (Note: although we are performing a lookup with no validity constraints
3058    //  and using the same object-relational-mapper-ish layer used by things
3059    //  that do have constraints, we are not at risk of exposing deleted
3060    //  messages to other code and getting it confused.  The only way code
3061    //  can find a message is if it shows up in their queries or gets announced
3062    //  via GlodaCollectionManager.itemsAdded, neither of which will happen.)
3063    let query = Gloda.newQuery(Gloda.NOUN_MESSAGE, {
3064      noDbQueryValidityConstraints: true,
3065    });
3066    query.headerMessageID.apply(query, aMessageIDs);
3067    query.frozen = true;
3068
3069    let listener = new MessagesByMessageIdCallback(
3070      msgIDToIndex,
3071      results,
3072      aCallback,
3073      aCallbackThis
3074    );
3075    return query.getCollection(listener, null, { becomeNull: true });
3076  },
3077
3078  /**
3079   * A reference to MsgHdrToMimeMessage that unit testing can clobber when it
3080   *  wants to cause us to hang or inject a fault.  If you are not
3081   *  glodaTestHelper.js then _do not touch this_.
3082   */
3083  _MsgHdrToMimeMessageFunc: MsgHdrToMimeMessage,
3084  /**
3085   * Primary message indexing logic.  This method is mainly concerned with
3086   *  getting all the information about the message required for threading /
3087   *  conversation building and subsequent processing.  It is responsible for
3088   *  determining whether to reuse existing gloda messages or whether a new one
3089   *  should be created.  Most attribute stuff happens in fund_attr.js or
3090   *  expl_attr.js.
3091   *
3092   * Prior to calling this method, the caller must have invoked
3093   *  |_indexerEnterFolder|, leaving us with the following true invariants
3094   *  below.
3095   *
3096   * @pre aMsgHdr.folder == this._indexingFolder
3097   * @pre aMsgHdr.folder.msgDatabase == this._indexingDatabase
3098   */
3099  *_indexMessage(aMsgHdr, aCallbackHandle) {
3100    this._log.debug(
3101      "*** Indexing message: " + aMsgHdr.messageKey + " : " + aMsgHdr.subject
3102    );
3103
3104    // If the message is offline, then get the message body as well
3105    let aMimeMsg;
3106    if (
3107      aMsgHdr.flags & Ci.nsMsgMessageFlags.Offline ||
3108      aMsgHdr.folder instanceof Ci.nsIMsgLocalMailFolder
3109    ) {
3110      this._MsgHdrToMimeMessageFunc(
3111        aMsgHdr,
3112        aCallbackHandle.callbackThis,
3113        aCallbackHandle.callback,
3114        false,
3115        { saneBodySize: true }
3116      );
3117      aMimeMsg = (yield this.kWorkAsync)[1];
3118    } else {
3119      this._log.debug("  * Message is not offline -- only headers indexed");
3120    }
3121
3122    this._log.debug("  * Got message, subject " + aMsgHdr.subject);
3123
3124    if (this._unitTestSuperVerbose) {
3125      if (aMimeMsg) {
3126        this._log.debug("  * Got Mime " + aMimeMsg.prettyString());
3127      } else {
3128        this._log.debug("  * NO MIME MESSAGE!!!\n");
3129      }
3130    }
3131
3132    // -- Find/create the conversation the message belongs to.
3133    // Our invariant is that all messages that exist in the database belong to
3134    //  a conversation.
3135
3136    // - See if any of the ancestors exist and have a conversationID...
3137    // (references are ordered from old [0] to new [n-1])
3138    let references = Array.from(range(0, aMsgHdr.numReferences)).map(i =>
3139      aMsgHdr.getStringReference(i)
3140    );
3141    // also see if we already know about the message...
3142    references.push(aMsgHdr.messageId);
3143
3144    this.getMessagesByMessageID(
3145      references,
3146      aCallbackHandle.callback,
3147      aCallbackHandle.callbackThis
3148    );
3149    // (ancestorLists has a direct correspondence to the message ids)
3150    let ancestorLists = yield this.kWorkAsync;
3151
3152    this._log.debug("ancestors raw: " + ancestorLists);
3153    this._log.debug(
3154      "ref len: " + references.length + " anc len: " + ancestorLists.length
3155    );
3156    this._log.debug("references: " + references);
3157    this._log.debug("ancestors: " + ancestorLists);
3158
3159    // pull our current message lookup results off
3160    references.pop();
3161    let candidateCurMsgs = ancestorLists.pop();
3162
3163    let conversationID = null;
3164    let conversation = null;
3165    // -- figure out the conversation ID
3166    // if we have a clone/already exist, just use his conversation ID
3167    if (candidateCurMsgs.length > 0) {
3168      conversationID = candidateCurMsgs[0].conversationID;
3169      conversation = candidateCurMsgs[0].conversation;
3170    } else {
3171      // otherwise check out our ancestors
3172      // (walk from closest to furthest ancestor)
3173      for (
3174        let iAncestor = ancestorLists.length - 1;
3175        iAncestor >= 0;
3176        --iAncestor
3177      ) {
3178        let ancestorList = ancestorLists[iAncestor];
3179
3180        if (ancestorList.length > 0) {
3181          // we only care about the first instance of the message because we are
3182          //  able to guarantee the invariant that all messages with the same
3183          //  message id belong to the same conversation.
3184          let ancestor = ancestorList[0];
3185          if (conversationID === null) {
3186            conversationID = ancestor.conversationID;
3187            conversation = ancestor.conversation;
3188          } else if (conversationID != ancestor.conversationID) {
3189            // XXX this inconsistency is known and understood and tracked by
3190            //  bug 478162 https://bugzilla.mozilla.org/show_bug.cgi?id=478162
3191            // this._log.error("Inconsistency in conversations invariant on " +
3192            //                ancestor.headerMessageID + ".  It has conv id " +
3193            //                ancestor.conversationID + " but expected " +
3194            //                conversationID + ". ID: " + ancestor.id);
3195          }
3196        }
3197      }
3198    }
3199
3200    // nobody had one?  create a new conversation
3201    if (conversationID === null) {
3202      // (the create method could issue the id, making the call return
3203      //  without waiting for the database...)
3204      conversation = this._datastore.createConversation(
3205        aMsgHdr.mime2DecodedSubject,
3206        null,
3207        null
3208      );
3209      conversationID = conversation.id;
3210    }
3211
3212    // Walk from furthest to closest ancestor, creating the ancestors that don't
3213    //  exist. (This is possible if previous messages that were consumed in this
3214    //  thread only had an in-reply-to or for some reason did not otherwise
3215    //  provide the full references chain.)
3216    for (let iAncestor = 0; iAncestor < ancestorLists.length; ++iAncestor) {
3217      let ancestorList = ancestorLists[iAncestor];
3218
3219      if (ancestorList.length == 0) {
3220        this._log.debug(
3221          "creating message with: null, " +
3222            conversationID +
3223            ", " +
3224            references[iAncestor] +
3225            ", null."
3226        );
3227        let ancestor = this._datastore.createMessage(
3228          null,
3229          null, // ghost
3230          conversationID,
3231          null,
3232          references[iAncestor],
3233          null, // no subject
3234          null, // no body
3235          null
3236        ); // no attachments
3237        this._datastore.insertMessage(ancestor);
3238        ancestorLists[iAncestor].push(ancestor);
3239      }
3240    }
3241    // now all our ancestors exist, though they may be ghost-like...
3242
3243    // find if there's a ghost version of our message or we already have indexed
3244    //  this message.
3245    let curMsg = null;
3246    this._log.debug(candidateCurMsgs.length + " candidate messages");
3247    for (let iCurCand = 0; iCurCand < candidateCurMsgs.length; iCurCand++) {
3248      let candMsg = candidateCurMsgs[iCurCand];
3249
3250      this._log.debug(
3251        "candidate folderID: " +
3252          candMsg.folderID +
3253          " messageKey: " +
3254          candMsg.messageKey
3255      );
3256
3257      if (candMsg.folderURI == this._indexingFolder.URI) {
3258        // if we are in the same folder and we have the same message key, we
3259        //  are definitely the same, stop looking.
3260        if (candMsg.messageKey == aMsgHdr.messageKey) {
3261          curMsg = candMsg;
3262          break;
3263        }
3264        // if (we are in the same folder and) the candidate message has a null
3265        //  message key, we treat it as our best option unless we find an exact
3266        //  key match. (this would happen because the 'move' notification case
3267        //  has to deal with not knowing the target message key.  this case
3268        //  will hopefully be somewhat improved in the future to not go through
3269        //  this path which mandates re-indexing of the message in its entirety)
3270        if (candMsg.messageKey === null) {
3271          curMsg = candMsg;
3272        } else if (
3273          curMsg === null &&
3274          !this._indexingDatabase.ContainsKey(candMsg.messageKey)
3275        ) {
3276          // (We are in the same folder and) the candidate message's underlying
3277          // message no longer exists/matches. Assume we are the same but
3278          // were betrayed by a re-indexing or something, but we have to make
3279          // sure a perfect match doesn't turn up.
3280          curMsg = candMsg;
3281        }
3282      } else if (curMsg === null && candMsg.folderID === null) {
3283        // a ghost/deleted message is fine
3284        curMsg = candMsg;
3285      }
3286    }
3287
3288    let attachmentNames = null;
3289    if (aMimeMsg) {
3290      attachmentNames = aMimeMsg.allAttachments
3291        .filter(att => att.isRealAttachment)
3292        .map(att => att.name);
3293    }
3294
3295    let isConceptuallyNew, isRecordNew, insertFulltext;
3296    if (curMsg === null) {
3297      curMsg = this._datastore.createMessage(
3298        aMsgHdr.folder,
3299        aMsgHdr.messageKey,
3300        conversationID,
3301        aMsgHdr.date,
3302        aMsgHdr.messageId
3303      );
3304      curMsg._conversation = conversation;
3305      isConceptuallyNew = isRecordNew = insertFulltext = true;
3306    } else {
3307      isRecordNew = false;
3308      // the message is conceptually new if it was a ghost or dead.
3309      isConceptuallyNew = curMsg._isGhost || curMsg._isDeleted;
3310      // insert fulltext if it was a ghost
3311      insertFulltext = curMsg._isGhost;
3312      curMsg._folderID = this._datastore._mapFolder(aMsgHdr.folder).id;
3313      curMsg._messageKey = aMsgHdr.messageKey;
3314      curMsg.date = new Date(aMsgHdr.date / 1000);
3315      // the message may have been deleted; tell it to make sure it's not.
3316      curMsg._ensureNotDeleted();
3317      // note: we are assuming that our matching logic is flawless in that
3318      //  if this message was not a ghost, we are assuming the 'body'
3319      //  associated with the id is still exactly the same.  It is conceivable
3320      //  that there are cases where this is not true.
3321    }
3322
3323    if (aMimeMsg) {
3324      let bodyPlain = aMimeMsg.coerceBodyToPlaintext(aMsgHdr.folder);
3325      if (bodyPlain) {
3326        curMsg._bodyLines = bodyPlain.split(/\r?\n/);
3327        // curMsg._content gets set by GlodaFundAttr.jsm
3328      }
3329    }
3330
3331    // Mark the message as new (for the purposes of fulltext insertion)
3332    if (insertFulltext) {
3333      curMsg._isNew = true;
3334    }
3335
3336    curMsg._subject = aMsgHdr.mime2DecodedSubject;
3337    curMsg._attachmentNames = attachmentNames;
3338
3339    // curMsg._indexAuthor gets set by GlodaFundAttr.jsm
3340    // curMsg._indexRecipients gets set by GlodaFundAttr.jsm
3341
3342    // zero the notability so everything in grokNounItem can just increment
3343    curMsg.notability = 0;
3344
3345    yield aCallbackHandle.pushAndGo(
3346      Gloda.grokNounItem(
3347        curMsg,
3348        { header: aMsgHdr, mime: aMimeMsg, bodyLines: curMsg._bodyLines },
3349        isConceptuallyNew,
3350        isRecordNew,
3351        aCallbackHandle
3352      )
3353    );
3354
3355    delete curMsg._bodyLines;
3356    delete curMsg._content;
3357    delete curMsg._isNew;
3358    delete curMsg._indexAuthor;
3359    delete curMsg._indexRecipients;
3360
3361    // we want to update the header for messages only after the transaction
3362    //  irrevocably hits the disk.  otherwise we could get confused if the
3363    //  transaction rolls back or what not.
3364    PendingCommitTracker.track(aMsgHdr, curMsg.id);
3365
3366    yield this.kWorkDone;
3367  },
3368
3369  /**
3370   * Wipe a message out of existence from our index.  This is slightly more
3371   *  tricky than one would first expect because there are potentially
3372   *  attributes not immediately associated with this message that reference
3373   *  the message.  Not only that, but deletion of messages may leave a
3374   *  conversation possessing only ghost messages, which we don't want, so we
3375   *  need to nuke the moot conversation and its moot ghost messages.
3376   * For now, we are actually punting on that trickiness, and the exact
3377   *  nuances aren't defined yet because we have not decided whether to store
3378   *  such attributes redundantly.  For example, if we have subject-pred-object,
3379   *  we could actually store this as attributes (subject, id, object) and
3380   *  (object, id, subject).  In such a case, we could query on (subject, *)
3381   *  and use the results to delete the (object, id, subject) case.  If we
3382   *  don't redundantly store attributes, we can deal with the problem by
3383   *  collecting up all the attributes that accept a message as their object
3384   *  type and issuing a delete against that.  For example, delete (*, [1,2,3],
3385   *  message id).
3386   * (We are punting because we haven't implemented support for generating
3387   *  attributes like that yet.)
3388   *
3389   * @TODO: implement deletion of attributes that reference (deleted) messages
3390   */
3391  *_deleteMessage(aMessage, aCallbackHandle) {
3392    this._log.debug("*** Deleting message: " + aMessage);
3393
3394    // -- delete our attributes
3395    // delete the message's attributes (if we implement the cascade delete, that
3396    //  could do the honors for us... right now we define the trigger in our
3397    //  schema but the back-end ignores it)
3398    GlodaDatastore.clearMessageAttributes(aMessage);
3399
3400    // -- delete our message or ghost us, and maybe nuke the whole conversation
3401    // Look at the other messages in the conversation.
3402    // (Note: although we are performing a lookup with no validity constraints
3403    //  and using the same object-relational-mapper-ish layer used by things
3404    //  that do have constraints, we are not at risk of exposing deleted
3405    //  messages to other code and getting it confused.  The only way code
3406    //  can find a message is if it shows up in their queries or gets announced
3407    //  via GlodaCollectionManager.itemsAdded, neither of which will happen.)
3408    let convPrivQuery = Gloda.newQuery(Gloda.NOUN_MESSAGE, {
3409      noDbQueryValidityConstraints: true,
3410    });
3411    convPrivQuery.conversation(aMessage.conversation);
3412    let conversationCollection = convPrivQuery.getCollection(aCallbackHandle);
3413    yield this.kWorkAsync;
3414
3415    let conversationMsgs = conversationCollection.items;
3416
3417    // Count the number of ghosts messages we see to determine if we are
3418    //  the last message alive.
3419    let ghostCount = 0;
3420    let twinMessageExists = false;
3421    for (let convMsg of conversationMsgs) {
3422      // ignore our own message
3423      if (convMsg.id == aMessage.id) {
3424        continue;
3425      }
3426
3427      if (convMsg._isGhost) {
3428        ghostCount++;
3429      } else if (
3430        // This message is our (living) twin if it is not a ghost, not deleted,
3431        // and has the same message-id header.
3432        !convMsg._isDeleted &&
3433        convMsg.headerMessageID == aMessage.headerMessageID
3434      ) {
3435        twinMessageExists = true;
3436      }
3437    }
3438
3439    // -- If everyone else is a ghost, blow away the conversation.
3440    // If there are messages still alive or deleted but we have not yet gotten
3441    //  to them yet _deleteMessage, then do not do this.  (We will eventually
3442    //  hit this case if they are all deleted.)
3443    if (conversationMsgs.length - 1 == ghostCount) {
3444      // - Obliterate each message
3445      for (let msg of conversationMsgs) {
3446        GlodaDatastore.deleteMessageByID(msg.id);
3447      }
3448      // - Obliterate the conversation
3449      GlodaDatastore.deleteConversationByID(aMessage.conversationID);
3450      // *no one* should hold a reference or use aMessage after this point,
3451      //  trash it so such ne'er do'wells are made plain.
3452      aMessage._objectPurgedMakeYourselfUnpleasant();
3453    } else if (twinMessageExists) {
3454      // -- Ghost or purge us as appropriate
3455      // Purge us if we have a (living) twin; no ghost required.
3456      GlodaDatastore.deleteMessageByID(aMessage.id);
3457      // *no one* should hold a reference or use aMessage after this point,
3458      //  trash it so such ne'er do'wells are made plain.
3459      aMessage._objectPurgedMakeYourselfUnpleasant();
3460    } else {
3461      // No twin, a ghost is required, we become the ghost.
3462      aMessage._ghost();
3463      GlodaDatastore.updateMessage(aMessage);
3464      // ghosts don't have fulltext. purge it.
3465      GlodaDatastore.deleteMessageTextByID(aMessage.id);
3466    }
3467
3468    yield this.kWorkDone;
3469  },
3470};
3471GlodaIndexer.registerIndexer(GlodaMsgIndexer);
3472