1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5"use strict";
6
7this.EXPORTED_SYMBOLS = ["FinderIterator"];
8
9const { interfaces: Ci, classes: Cc, utils: Cu } = Components;
10
11Cu.import("resource://gre/modules/Services.jsm");
12Cu.import("resource://gre/modules/Task.jsm");
13Cu.import("resource://gre/modules/Timer.jsm");
14Cu.import("resource://gre/modules/XPCOMUtils.jsm");
15
16XPCOMUtils.defineLazyModuleGetter(this, "NLP", "resource://gre/modules/NLP.jsm");
17
18const kDebug = false;
19const kIterationSizeMax = 100;
20const kTimeoutPref = "findbar.iteratorTimeout";
21
22/**
23 * FinderIterator singleton. See the documentation for the `start()` method to
24 * learn more.
25 */
26this.FinderIterator = {
27  _currentParams: null,
28  _listeners: new Map(),
29  _catchingUp: new Set(),
30  _previousParams: null,
31  _previousRanges: [],
32  _spawnId: 0,
33  _timeout: Services.prefs.getIntPref(kTimeoutPref),
34  _timer: null,
35  ranges: [],
36  running: false,
37
38  // Expose `kIterationSizeMax` to the outside world for unit tests to use.
39  get kIterationSizeMax() { return kIterationSizeMax },
40
41  get params() {
42    if (!this._currentParams && !this._previousParams)
43      return null;
44    return Object.assign({}, this._currentParams || this._previousParams);
45  },
46
47  /**
48   * Start iterating the active Finder docShell, using the options below. When
49   * it already started at the request of another consumer, we first yield the
50   * results we already collected before continuing onward to yield fresh results.
51   * We make sure to pause every `kIterationSizeMax` iterations to make sure we
52   * don't block the host process too long. In the case of a break like this, we
53   * yield `undefined`, instead of a range.
54   * Upon re-entrance after a break, we check if `stop()` was called during the
55   * break and if so, we stop iterating.
56   * Results are also passed to the `listener.onIteratorRangeFound` callback
57   * method, along with a flag that specifies if the result comes from the cache
58   * or is fresh. The callback also adheres to the `limit` flag.
59   * The returned promise is resolved when 1) the limit is reached, 2) when all
60   * the ranges have been found or 3) when `stop()` is called whilst iterating.
61   *
62   * @param {Number}  [options.allowDistance] Allowed edit distance between the
63   *                                          current word and `options.word`
64   *                                          when the iterator is already running
65   * @param {Boolean} options.caseSensitive   Whether to search in case sensitive
66   *                                          mode
67   * @param {Boolean} options.entireWord      Whether to search in entire-word mode
68   * @param {Finder}  options.finder          Currently active Finder instance
69   * @param {Number}  [options.limit]         Limit the amount of results to be
70   *                                          passed back. Optional, defaults to no
71   *                                          limit.
72   * @param {Boolean} [options.linksOnly]     Only yield ranges that are inside a
73   *                                          hyperlink (used by QuickFind).
74   *                                          Optional, defaults to `false`.
75   * @param {Object}  options.listener        Listener object that implements the
76   *                                          following callback functions:
77   *                                           - onIteratorRangeFound({nsIDOMRange} range);
78   *                                           - onIteratorReset();
79   *                                           - onIteratorRestart({Object} iterParams);
80   *                                           - onIteratorStart({Object} iterParams);
81   * @param {Boolean} [options.useCache]        Whether to allow results already
82   *                                            present in the cache or demand fresh.
83   *                                            Optional, defaults to `false`.
84   * @param {String}  options.word              Word to search for
85   * @return {Promise}
86   */
87  start({ allowDistance, caseSensitive, entireWord, finder, limit, linksOnly, listener, useCache, word }) {
88    // Take care of default values for non-required options.
89    if (typeof allowDistance != "number")
90      allowDistance = 0;
91    if (typeof limit != "number")
92      limit = -1;
93    if (typeof linksOnly != "boolean")
94      linksOnly = false;
95    if (typeof useCache != "boolean")
96      useCache = false;
97
98    // Validate the options.
99    if (typeof caseSensitive != "boolean")
100      throw new Error("Missing required option 'caseSensitive'");
101    if (typeof entireWord != "boolean")
102      throw new Error("Missing required option 'entireWord'");
103    if (!finder)
104      throw new Error("Missing required option 'finder'");
105    if (!word)
106      throw new Error("Missing required option 'word'");
107    if (typeof listener != "object" || !listener.onIteratorRangeFound)
108      throw new TypeError("Missing valid, required option 'listener'");
109
110    // If the listener was added before, make sure the promise is resolved before
111    // we replace it with another.
112    if (this._listeners.has(listener)) {
113      let { onEnd } = this._listeners.get(listener);
114      if (onEnd)
115        onEnd();
116    }
117
118    let window = finder._getWindow();
119    let resolver;
120    let promise = new Promise(resolve => resolver = resolve);
121    let iterParams = { caseSensitive, entireWord, linksOnly, useCache, window, word };
122
123    this._listeners.set(listener, { limit, onEnd: resolver });
124
125    // If we're not running anymore and we're requesting the previous result, use it.
126    if (!this.running && this._previousResultAvailable(iterParams)) {
127      this._yieldPreviousResult(listener, window);
128      return promise;
129    }
130
131    if (this.running) {
132      // Double-check if we're not running the iterator with a different set of
133      // parameters, otherwise report an error with the most common reason.
134      if (!this._areParamsEqual(this._currentParams, iterParams, allowDistance)) {
135        if (kDebug) {
136          Cu.reportError(`We're currently iterating over '${this._currentParams.word}', not '${word}'\n` +
137            new Error().stack);
138        }
139        this._listeners.delete(listener);
140        resolver();
141        return promise;
142      }
143
144      // if we're still running, yield the set we have built up this far.
145      this._yieldIntermediateResult(listener, window);
146
147      return promise;
148    }
149
150    // Start!
151    this.running = true;
152    this._currentParams = iterParams;
153    this._findAllRanges(finder, ++this._spawnId);
154
155    return promise;
156  },
157
158  /**
159   * Stop the currently running iterator as soon as possible and optionally cache
160   * the result for later.
161   *
162   * @param {Boolean} [cachePrevious] Whether to save the result for later.
163   *                                  Optional.
164   */
165  stop(cachePrevious = false) {
166    if (!this.running)
167      return;
168
169    if (this._timer) {
170      clearTimeout(this._timer);
171      this._timer = null;
172    }
173    if (this._runningFindResolver) {
174      this._runningFindResolver();
175      this._runningFindResolver = null;
176    }
177
178    if (cachePrevious) {
179      this._previousRanges = [].concat(this.ranges);
180      this._previousParams = Object.assign({}, this._currentParams);
181    } else {
182      this._previousRanges = [];
183      this._previousParams = null;
184    }
185
186    this._catchingUp.clear();
187    this._currentParams = null;
188    this.ranges = [];
189    this.running = false;
190
191    for (let [, { onEnd }] of this._listeners)
192      onEnd();
193  },
194
195  /**
196   * Stops the iteration that currently running, if it is, and start a new one
197   * with the exact same params as before.
198   *
199   * @param {Finder} finder Currently active Finder instance
200   */
201  restart(finder) {
202    // Capture current iterator params before we stop the show.
203    let iterParams = this.params;
204    if (!iterParams)
205      return;
206    this.stop();
207
208    // Restart manually.
209    this.running = true;
210    this._currentParams = iterParams;
211
212    this._findAllRanges(finder, ++this._spawnId);
213    this._notifyListeners("restart", iterParams);
214  },
215
216  /**
217   * Reset the internal state of the iterator. Typically this would be called
218   * when the docShell is not active anymore, which makes the current and cached
219   * previous result invalid.
220   * If the iterator is running, it will be stopped as soon as possible.
221   */
222  reset() {
223    if (this._timer) {
224      clearTimeout(this._timer);
225      this._timer = null;
226    }
227    if (this._runningFindResolver) {
228      this._runningFindResolver();
229      this._runningFindResolver = null;
230    }
231
232    this._catchingUp.clear();
233    this._currentParams = this._previousParams = null;
234    this._previousRanges = [];
235    this.ranges = [];
236    this.running = false;
237
238    this._notifyListeners("reset");
239    for (let [, { onEnd }] of this._listeners)
240      onEnd();
241    this._listeners.clear();
242  },
243
244  /**
245   * Check if the currently running iterator parameters are the same as the ones
246   * passed through the arguments. When `true`, we can keep it running as-is and
247   * the consumer should stop the iterator when `false`.
248   *
249   * @param {Boolean}  options.caseSensitive Whether to search in case sensitive
250   *                                         mode
251   * @param {Boolean}  options.entireWord    Whether to search in entire-word mode
252   * @param  {Boolean} options.linksOnly     Whether to search for the word to be
253   *                                         present in links only
254   * @param  {String}  options.word          The word being searched for
255   * @return {Boolean}
256   */
257  continueRunning({ caseSensitive, entireWord, linksOnly, word }) {
258    return (this.running &&
259      this._currentParams.caseSensitive === caseSensitive &&
260      this._currentParams.entireWord === entireWord &&
261      this._currentParams.linksOnly === linksOnly &&
262      this._currentParams.word == word);
263  },
264
265  /**
266   * The default mode of operation of the iterator is to not accept duplicate
267   * listeners, resolve the promise of the older listeners and replace it with
268   * the new listener.
269   * Consumers may opt-out of this behavior by using this check and not call
270   * start().
271   *
272   * @param  {Object} paramSet Property bag with the same signature as you would
273   *                           pass into `start()`
274   * @return {Boolean}
275   */
276  isAlreadyRunning(paramSet) {
277    return (this.running &&
278      this._areParamsEqual(this._currentParams, paramSet) &&
279      this._listeners.has(paramSet.listener));
280  },
281
282  /**
283   * Safely notify all registered listeners that an event has occurred.
284   *
285   * @param {String}   callback    Name of the callback to invoke
286   * @param {mixed}    [params]    Optional argument that will be passed to the
287   *                               callback
288   * @param {Iterable} [listeners] Set of listeners to notify. Optional, defaults
289   *                               to `this._listeners.keys()`.
290   */
291  _notifyListeners(callback, params, listeners = this._listeners.keys()) {
292    callback = "onIterator" + callback.charAt(0).toUpperCase() + callback.substr(1);
293    for (let listener of listeners) {
294      try {
295        listener[callback](params);
296      } catch (ex) {
297        Cu.reportError("FinderIterator Error: " + ex);
298      }
299    }
300  },
301
302  /**
303   * Internal; check if an iteration request is available in the previous result
304   * that we cached.
305   *
306   * @param  {Boolean} options.caseSensitive Whether to search in case sensitive
307   *                                         mode
308   * @param  {Boolean} options.entireWord    Whether to search in entire-word mode
309   * @param  {Boolean} options.linksOnly     Whether to search for the word to be
310   *                                         present in links only
311   * @param  {Boolean} options.useCache      Whether the consumer wants to use the
312   *                                         cached previous result at all
313   * @param  {String}  options.word          The word being searched for
314   * @return {Boolean}
315   */
316  _previousResultAvailable({ caseSensitive, entireWord, linksOnly, useCache, word }) {
317    return !!(useCache &&
318      this._areParamsEqual(this._previousParams, { caseSensitive, entireWord, linksOnly, word }) &&
319      this._previousRanges.length);
320  },
321
322  /**
323   * Internal; compare if two sets of iterator parameters are equivalent.
324   *
325   * @param  {Object} paramSet1       First set of params (left hand side)
326   * @param  {Object} paramSet2       Second set of params (right hand side)
327   * @param  {Number} [allowDistance] Allowed edit distance between the two words.
328   *                                  Optional, defaults to '0', which means 'no
329   *                                  distance'.
330   * @return {Boolean}
331   */
332  _areParamsEqual(paramSet1, paramSet2, allowDistance = 0) {
333    return (!!paramSet1 && !!paramSet2 &&
334      paramSet1.caseSensitive === paramSet2.caseSensitive &&
335      paramSet1.entireWord === paramSet2.entireWord &&
336      paramSet1.linksOnly === paramSet2.linksOnly &&
337      paramSet1.window === paramSet2.window &&
338      NLP.levenshtein(paramSet1.word, paramSet2.word) <= allowDistance);
339  },
340
341  /**
342   * Internal; iterate over a predefined set of ranges that have been collected
343   * before.
344   * Also here, we make sure to pause every `kIterationSizeMax` iterations to
345   * make sure we don't block the host process too long. In the case of a break
346   * like this, we yield `undefined`, instead of a range.
347   *
348   * @param {Object}       listener    Listener object
349   * @param {Array}        rangeSource Set of ranges to iterate over
350   * @param {nsIDOMWindow} window      The window object is only really used
351   *                                   for access to `setTimeout`
352   * @param {Boolean}      [withPause] Whether to pause after each `kIterationSizeMax`
353   *                                   number of ranges yielded. Optional, defaults
354   *                                   to `true`.
355   * @yield {nsIDOMRange}
356   */
357  _yieldResult: function* (listener, rangeSource, window, withPause = true) {
358    // We keep track of the number of iterations to allow a short pause between
359    // every `kIterationSizeMax` number of iterations.
360    let iterCount = 0;
361    let { limit, onEnd } = this._listeners.get(listener);
362    let ranges = rangeSource.slice(0, limit > -1 ? limit : undefined);
363    for (let range of ranges) {
364      try {
365        range.startContainer;
366      } catch (ex) {
367        // Don't yield dead objects, so use the escape hatch.
368        if (ex.message.includes("dead object"))
369          return;
370      }
371
372      // Pass a flag that is `true` when we're returning the result from a
373      // cached previous iteration.
374      listener.onIteratorRangeFound(range, !this.running);
375      yield range;
376
377      if (withPause && ++iterCount >= kIterationSizeMax) {
378        iterCount = 0;
379        // Make sure to save the current limit for later.
380        this._listeners.set(listener, { limit, onEnd });
381        // Sleep for the rest of this cycle.
382        yield new Promise(resolve => window.setTimeout(resolve, 0));
383        // After a sleep, the set of ranges may have updated.
384        ranges = rangeSource.slice(0, limit > -1 ? limit : undefined);
385      }
386
387      if (limit !== -1 && --limit === 0) {
388        // We've reached our limit; no need to do more work.
389        this._listeners.delete(listener);
390        onEnd();
391        return;
392      }
393    }
394
395    // Save the updated limit globally.
396    this._listeners.set(listener, { limit, onEnd });
397  },
398
399  /**
400   * Internal; iterate over the set of previously found ranges. Meanwhile it'll
401   * mark the listener as 'catching up', meaning it will not receive fresh
402   * results from a running iterator.
403   *
404   * @param {Object}       listener Listener object
405   * @param {nsIDOMWindow} window   The window object is only really used
406   *                                for access to `setTimeout`
407   * @yield {nsIDOMRange}
408   */
409  _yieldPreviousResult: Task.async(function* (listener, window) {
410    this._notifyListeners("start", this.params, [listener]);
411    this._catchingUp.add(listener);
412    yield* this._yieldResult(listener, this._previousRanges, window);
413    this._catchingUp.delete(listener);
414    let { onEnd } = this._listeners.get(listener);
415    if (onEnd)
416      onEnd();
417  }),
418
419  /**
420   * Internal; iterate over the set of already found ranges. Meanwhile it'll
421   * mark the listener as 'catching up', meaning it will not receive fresh
422   * results from the running iterator.
423   *
424   * @param {Object}       listener Listener object
425   * @param {nsIDOMWindow} window   The window object is only really used
426   *                                for access to `setTimeout`
427   * @yield {nsIDOMRange}
428   */
429  _yieldIntermediateResult: Task.async(function* (listener, window) {
430    this._notifyListeners("start", this.params, [listener]);
431    this._catchingUp.add(listener);
432    yield* this._yieldResult(listener, this.ranges, window, false);
433    this._catchingUp.delete(listener);
434  }),
435
436  /**
437   * Internal; see the documentation of the start() method above.
438   *
439   * @param {Finder}       finder  Currently active Finder instance
440   * @param {Number}       spawnId Since `stop()` is synchronous and this method
441   *                               is not, this identifier is used to learn if
442   *                               it's supposed to still continue after a pause.
443   * @yield {nsIDOMRange}
444   */
445  _findAllRanges: Task.async(function* (finder, spawnId) {
446    if (this._timeout) {
447      if (this._timer)
448        clearTimeout(this._timer);
449      if (this._runningFindResolver)
450        this._runningFindResolver();
451
452      let timeout = this._timeout;
453      let searchTerm = this._currentParams.word;
454      // Wait a little longer when the first or second character is typed into
455      // the findbar.
456      if (searchTerm.length == 1)
457        timeout *= 4;
458      else if (searchTerm.length == 2)
459        timeout *= 2;
460      yield new Promise(resolve => {
461        this._runningFindResolver = resolve;
462        this._timer = setTimeout(resolve, timeout);
463      });
464      this._timer = this._runningFindResolver = null;
465      // During the timeout, we could have gotten the signal to stop iterating.
466      // Make sure we do here.
467      if (!this.running || spawnId !== this._spawnId)
468        return;
469    }
470
471    this._notifyListeners("start", this.params);
472
473    let { linksOnly, window, word } = this._currentParams;
474    // First we collect all frames we need to search through, whilst making sure
475    // that the parent window gets dibs.
476    let frames = [window].concat(this._collectFrames(window, finder));
477    let iterCount = 0;
478    for (let frame of frames) {
479      for (let range of this._iterateDocument(this._currentParams, frame)) {
480        // Between iterations, for example after a sleep of one cycle, we could
481        // have gotten the signal to stop iterating. Make sure we do here.
482        if (!this.running || spawnId !== this._spawnId)
483          return;
484
485        // Deal with links-only mode here.
486        if (linksOnly && !this._rangeStartsInLink(range))
487          continue;
488
489        this.ranges.push(range);
490
491        // Call each listener with the range we just found.
492        for (let [listener, { limit, onEnd }] of this._listeners) {
493          if (this._catchingUp.has(listener))
494            continue;
495
496          listener.onIteratorRangeFound(range);
497
498          if (limit !== -1 && --limit === 0) {
499            // We've reached our limit; no need to do more work for this listener.
500            this._listeners.delete(listener);
501            onEnd();
502            continue;
503          }
504
505          // Save the updated limit globally.
506          this._listeners.set(listener, { limit, onEnd });
507        }
508
509        yield range;
510
511        if (++iterCount >= kIterationSizeMax) {
512          iterCount = 0;
513          // Sleep for the rest of this cycle.
514          yield new Promise(resolve => window.setTimeout(resolve, 0));
515        }
516      }
517    }
518
519    // When the iterating has finished, make sure we reset and save the state
520    // properly.
521    this.stop(true);
522  }),
523
524  /**
525   * Internal; basic wrapper around nsIFind that provides a generator yielding
526   * a range each time an occurence of `word` string is found.
527   *
528   * @param {Boolean}      options.caseSensitive Whether to search in case
529   *                                             sensitive mode
530   * @param {Boolean}      options.entireWord    Whether to search in entire-word
531   *                                             mode
532   * @param {String}       options.word          The word to search for
533   * @param {nsIDOMWindow} window                The window to search in
534   * @yield {nsIDOMRange}
535   */
536  _iterateDocument: function* ({ caseSensitive, entireWord, word }, window) {
537    let doc = window.document;
538    let body = (doc instanceof Ci.nsIDOMHTMLDocument && doc.body) ?
539               doc.body : doc.documentElement;
540
541    if (!body)
542      return;
543
544    let searchRange = doc.createRange();
545    searchRange.selectNodeContents(body);
546
547    let startPt = searchRange.cloneRange();
548    startPt.collapse(true);
549
550    let endPt = searchRange.cloneRange();
551    endPt.collapse(false);
552
553    let retRange = null;
554
555    let nsIFind = Cc["@mozilla.org/embedcomp/rangefind;1"]
556                    .createInstance()
557                    .QueryInterface(Ci.nsIFind);
558    nsIFind.caseSensitive = caseSensitive;
559    nsIFind.entireWord = entireWord;
560
561    while ((retRange = nsIFind.Find(word, searchRange, startPt, endPt))) {
562      yield retRange;
563      startPt = retRange.cloneRange();
564      startPt.collapse(false);
565    }
566  },
567
568  /**
569   * Internal; helper method for the iterator that recursively collects all
570   * visible (i)frames inside a window.
571   *
572   * @param  {nsIDOMWindow} window The window to extract the (i)frames from
573   * @param  {Finder}       finder The Finder instance
574   * @return {Array}        Stack of frames to iterate over
575   */
576  _collectFrames(window, finder) {
577    let frames = [];
578    if (!("frames" in window) || !window.frames.length)
579      return frames;
580
581    // Casting `window.frames` to an Iterator doesn't work, so we're stuck with
582    // a plain, old for-loop.
583    for (let i = 0, l = window.frames.length; i < l; ++i) {
584      let frame = window.frames[i];
585      // Don't count matches in hidden frames.
586      let frameEl = frame && frame.frameElement;
587      if (!frameEl)
588        continue;
589      // Construct a range around the frame element to check its visiblity.
590      let range = window.document.createRange();
591      range.setStart(frameEl, 0);
592      range.setEnd(frameEl, 0);
593      if (!finder._fastFind.isRangeVisible(range, this._getDocShell(range), true))
594        continue;
595      // All conditions pass, so push the current frame and its children on the
596      // stack.
597      frames.push(frame, ...this._collectFrames(frame, finder));
598    }
599
600    return frames;
601  },
602
603  /**
604   * Internal; helper method to extract the docShell reference from a Window or
605   * Range object.
606   *
607   * @param  {nsIDOMRange} windowOrRange Window object to query. May also be a
608   *                                     Range, from which the owner window will
609   *                                     be queried.
610   * @return {nsIDocShell}
611   */
612  _getDocShell(windowOrRange) {
613    let window = windowOrRange;
614    // Ranges may also be passed in, so fetch its window.
615    if (windowOrRange instanceof Ci.nsIDOMRange)
616      window = windowOrRange.startContainer.ownerDocument.defaultView;
617    return window.QueryInterface(Ci.nsIInterfaceRequestor)
618                 .getInterface(Ci.nsIWebNavigation)
619                 .QueryInterface(Ci.nsIDocShell);
620  },
621
622  /**
623   * Internal; determines whether a range is inside a link.
624   *
625   * @param  {nsIDOMRange} range the range to check
626   * @return {Boolean}     True if the range starts in a link
627   */
628  _rangeStartsInLink(range) {
629    let isInsideLink = false;
630    let node = range.startContainer;
631
632    if (node.nodeType == node.ELEMENT_NODE) {
633      if (node.hasChildNodes) {
634        let childNode = node.item(range.startOffset);
635        if (childNode)
636          node = childNode;
637      }
638    }
639
640    const XLink_NS = "http://www.w3.org/1999/xlink";
641    const HTMLAnchorElement = (node.ownerDocument || node).defaultView.HTMLAnchorElement;
642    do {
643      if (node instanceof HTMLAnchorElement) {
644        isInsideLink = node.hasAttribute("href");
645        break;
646      } else if (typeof node.hasAttributeNS == "function" &&
647                 node.hasAttributeNS(XLink_NS, "href")) {
648        isInsideLink = (node.getAttributeNS(XLink_NS, "type") == "simple");
649        break;
650      }
651
652      node = node.parentNode;
653    } while (node);
654
655    return isInsideLink;
656  }
657};
658