1/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2/* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 * You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6#include "nsISupports.idl"
7
8typedef unsigned short SpeechServiceType;
9
10/**
11 * A callback is implemented by the service. For direct audio services, it is
12 * required to implement these, although it could be helpful to use the
13 * cancel method for shutting down the speech resources.
14 */
15[scriptable, uuid(c576de0c-8a3d-4570-be7e-9876d3e5bed2)]
16interface nsISpeechTaskCallback : nsISupports
17{
18  /**
19   * The user or application has paused the speech.
20   */
21  void onPause();
22
23  /**
24   * The user or application has resumed the speech.
25   */
26  void onResume();
27
28  /**
29   * The user or application has canceled the speech.
30   */
31  void onCancel();
32
33  /**
34   * The user or application has changed the volume of this speech.
35   * This is only used on indirect audio service type.
36   */
37  void onVolumeChanged(in float aVolume);
38};
39
40
41/**
42 * A task is associated with a single utterance. It is provided by the browser
43 * to the service in the speak() method.
44 */
45[scriptable, builtinclass, uuid(ad59949c-2437-4b35-8eeb-d760caab75c5)]
46interface nsISpeechTask : nsISupports
47{
48  /**
49   * Prepare browser for speech.
50   *
51   * @param aCallback callback object for mid-speech operations.
52   * @param aChannels number of audio channels. Only required
53   *                    in direct audio services
54   * @param aRate     audio rate. Only required in direct audio services
55   */
56  [optional_argc] void setup(in nsISpeechTaskCallback aCallback,
57                               [optional] in uint32_t aChannels,
58                               [optional] in uint32_t aRate);
59
60  /**
61   * Send audio data to browser.
62   *
63   * @param aData     an Int16Array with PCM-16 audio data.
64   * @param aLandmarks an array of sample offset and landmark pairs.
65   *                     Used for emiting boundary and mark events.
66   */
67  [implicit_jscontext]
68  void sendAudio(in jsval aData, in jsval aLandmarks);
69
70  [noscript]
71  void sendAudioNative([array, size_is(aDataLen)] in short aData, in unsigned long aDataLen);
72
73  /**
74   * Dispatch start event.
75   */
76  void dispatchStart();
77
78  /**
79   * Dispatch end event.
80   *
81   * @param aElapsedTime time in seconds since speech has started.
82   * @param aCharIndex   offset of spoken characters.
83   */
84  void dispatchEnd(in float aElapsedTime, in unsigned long aCharIndex);
85
86  /**
87   * Dispatch pause event.
88   *
89   * @param aElapsedTime time in seconds since speech has started.
90   * @param aCharIndex   offset of spoken characters.
91   */
92  void dispatchPause(in float aElapsedTime, in unsigned long aCharIndex);
93
94  /**
95   * Dispatch resume event.
96   *
97   * @param aElapsedTime time in seconds since speech has started.
98   * @param aCharIndex   offset of spoken characters.
99   */
100  void dispatchResume(in float aElapsedTime, in unsigned long aCharIndex);
101
102  /**
103   * Dispatch error event.
104   *
105   * @param aElapsedTime time in seconds since speech has started.
106   * @param aCharIndex   offset of spoken characters.
107   */
108  void dispatchError(in float aElapsedTime, in unsigned long aCharIndex);
109
110  /**
111   * Dispatch boundary event.
112   *
113   * @param aName        name of boundary, 'word' or 'sentence'
114   * @param aElapsedTime time in seconds since speech has started.
115   * @param aCharIndex   offset of spoken characters.
116   */
117  void dispatchBoundary(in DOMString aName, in float aElapsedTime,
118                        in unsigned long aCharIndex);
119
120  /**
121   * Dispatch mark event.
122   *
123   * @param aName        mark identifier.
124   * @param aElapsedTime time in seconds since speech has started.
125   * @param aCharIndex   offset of spoken characters.
126   */
127  void dispatchMark(in DOMString aName, in float aElapsedTime, in unsigned long aCharIndex);
128};
129
130/**
131 * The main interface of a speech synthesis service.
132 *
133 * A service's speak method could be implemented in two ways:
134 *  1. Indirect audio - the service is responsible for outputting audio.
135 *    The service calls the nsISpeechTask.dispatch* methods directly. Starting
136 *    with dispatchStart() and ending with dispatchEnd or dispatchError().
137 *
138 *  2. Direct audio - the service provides us with PCM-16 data, and we output it.
139 *    The service does not call the dispatch task methods directly. Instead,
140 *    audio information is provided at setup(), and audio data is sent with
141 *    sendAudio(). The utterance is terminated with an empty sendAudio().
142 */
143[scriptable, uuid(9b7d59db-88ff-43d0-b6ee-9f63d042d08f)]
144interface nsISpeechService : nsISupports
145{
146  /**
147   * Speak the given text using the voice identified byu the given uri. See
148   * W3C Speech API spec for information about pitch and rate.
149   * https://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html#utterance-attributes
150   *
151   * @param aText   text to utter.
152   * @param aUri    unique voice identifier.
153   * @param aVolume volume to speak voice in. Only relevant for indirect audio.
154   * @param aRate   rate to speak voice in.
155   * @param aPitch  pitch to speak voice in.
156   * @param aTask  task instance for utterance, used for sending events or audio
157   *                 data back to browser.
158   */
159  void speak(in DOMString aText, in DOMString aUri,
160             in float aVolume, in float aRate, in float aPitch,
161             in nsISpeechTask aTask);
162
163  const SpeechServiceType SERVICETYPE_DIRECT_AUDIO = 1;
164  const SpeechServiceType SERVICETYPE_INDIRECT_AUDIO = 2;
165
166  readonly attribute SpeechServiceType serviceType;
167};
168
169%{C++
170// This is the service category speech services could use to start up as
171// a component.
172#define NS_SPEECH_SYNTH_STARTED "speech-synth-started"
173%}
174