1/* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this file, 3 * You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5"use strict"; 6 7var EXPORTED_SYMBOLS = ["LanguageDetector"]; 8 9const { clearTimeout, setTimeout } = ChromeUtils.import( 10 "resource://gre/modules/Timer.jsm" 11); 12 13// Since Emscripten can handle heap growth, but not heap shrinkage, we 14// need to refresh the worker after we've processed a particularly large 15// string in order to prevent unnecessary resident memory growth. 16// 17// These values define the cut-off string length and the idle timeout 18// (in milliseconds) before destroying a worker. Once a string of the 19// maximum size has been processed, the worker is marked for 20// destruction, and is terminated as soon as it has been idle for the 21// given timeout. 22// 23// 1.5MB. This is the approximate string length that forces heap growth 24// for a 2MB heap. 25var LARGE_STRING = 1.5 * 1024 * 1024; 26var IDLE_TIMEOUT = 10 * 1000; 27 28const WORKER_URL = "resource:///modules/translation/cld-worker.js"; 29 30var workerManager = { 31 detectionQueue: [], 32 33 detectLanguage(aParams) { 34 return this.workerReady 35 .then(worker => { 36 return new Promise(resolve => { 37 this.detectionQueue.push({ resolve }); 38 worker.postMessage(aParams); 39 }); 40 }) 41 .then(result => { 42 // We have our asynchronous result from the worker. 43 // 44 // Determine if our input was large enough to trigger heap growth, 45 // or if we're already waiting to destroy the worker when it's 46 // idle. If so, schedule termination after the idle timeout. 47 if (aParams.text.length >= LARGE_STRING || this._idleTimeout != null) { 48 this.flushWorker(); 49 } 50 51 return result; 52 }); 53 }, 54 55 _worker: null, 56 _workerReadyPromise: null, 57 58 get workerReady() { 59 if (!this._workerReadyPromise) { 60 this._workerReadyPromise = new Promise(resolve => { 61 let worker = new Worker(WORKER_URL); 62 worker.onmessage = aMsg => { 63 if (aMsg.data == "ready") { 64 resolve(worker); 65 } else { 66 this.detectionQueue.shift().resolve(aMsg.data); 67 } 68 }; 69 this._worker = worker; 70 }); 71 } 72 73 return this._workerReadyPromise; 74 }, 75 76 // Holds the ID of the current pending idle cleanup setTimeout. 77 _idleTimeout: null, 78 79 // Schedule the current worker to be terminated after the idle timeout. 80 flushWorker() { 81 if (this._idleTimeout != null) { 82 clearTimeout(this._idleTimeout); 83 } 84 85 this._idleTimeout = setTimeout(this._flushWorker.bind(this), IDLE_TIMEOUT); 86 }, 87 88 // Immediately terminate the worker, as long as there no pending 89 // results. Otherwise, reschedule termination until after the next 90 // idle timeout. 91 _flushWorker() { 92 if (this.detectionQueue.length) { 93 this.flushWorker(); 94 } else { 95 if (this._worker) { 96 this._worker.terminate(); 97 } 98 99 this._worker = null; 100 this._workerReadyPromise = null; 101 this._idleTimeout = null; 102 } 103 }, 104}; 105 106var LanguageDetector = { 107 /** 108 * Detect the language of a given string. 109 * 110 * The argument may be either a string containing the text to analyze, 111 * or an object with the following properties: 112 * 113 * - 'text' The text to analyze. 114 * 115 * - 'isHTML' (optional) A boolean, indicating whether the text 116 * should be analyzed as HTML rather than plain text. 117 * 118 * - 'language' (optional) A string indicating the expected language. 119 * For text extracted from HTTP documents, this is expected to 120 * come from the Content-Language header. 121 * 122 * - 'tld' (optional) A string indicating the top-level domain of the 123 * document the text was extracted from. 124 * 125 * - 'encoding' (optional) A string describing the encoding of the 126 * document the string was extracted from. Note that, regardless 127 * of the value of this property, the 'text' property must be a 128 * UTF-16 JavaScript string. 129 * 130 * @returns {Promise<Object>} 131 * @resolves When detection is finished, with a object containing 132 * these fields: 133 * - 'language' (string with a language code) 134 * - 'confident' (boolean) Whether the detector is confident of the 135 * result. 136 * - 'languages' (array) An array of up to three elements, containing 137 * the most prevalent languages detected. It contains a 138 * 'languageCode' property, containing the ISO language code of 139 * the language, and a 'percent' property, describing the 140 * approximate percentage of the input which is in that language. 141 * For text of an unknown language, the result may contain an 142 * entry with the languge code 'un', indicating the percent of 143 * the text which is unknown. 144 */ 145 detectLanguage(aParams) { 146 if (typeof aParams == "string") { 147 aParams = { text: aParams }; 148 } 149 150 return workerManager.detectLanguage(aParams); 151 }, 152}; 153