1 /* 2 * Copyright 2008-2014 Arsen Chaloyan 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * 16 * $Id: mrcp_recog_header.h 2136 2014-07-04 06:33:36Z achaloyan@gmail.com $ 17 */ 18 19 #ifndef MRCP_RECOG_HEADER_H 20 #define MRCP_RECOG_HEADER_H 21 22 /** 23 * @file mrcp_recog_header.h 24 * @brief MRCP Recognizer Header 25 */ 26 27 #include "mrcp_types.h" 28 #include "mrcp_header_accessor.h" 29 30 APT_BEGIN_EXTERN_C 31 32 /** MRCP recognizer header fields */ 33 typedef enum { 34 RECOGNIZER_HEADER_CONFIDENCE_THRESHOLD, 35 RECOGNIZER_HEADER_SENSITIVITY_LEVEL, 36 RECOGNIZER_HEADER_SPEED_VS_ACCURACY, 37 RECOGNIZER_HEADER_N_BEST_LIST_LENGTH, 38 RECOGNIZER_HEADER_NO_INPUT_TIMEOUT, 39 RECOGNIZER_HEADER_RECOGNITION_TIMEOUT, 40 RECOGNIZER_HEADER_WAVEFORM_URI, 41 RECOGNIZER_HEADER_COMPLETION_CAUSE, 42 RECOGNIZER_HEADER_RECOGNIZER_CONTEXT_BLOCK, 43 RECOGNIZER_HEADER_START_INPUT_TIMERS, 44 RECOGNIZER_HEADER_SPEECH_COMPLETE_TIMEOUT, 45 RECOGNIZER_HEADER_SPEECH_INCOMPLETE_TIMEOUT, 46 RECOGNIZER_HEADER_DTMF_INTERDIGIT_TIMEOUT, 47 RECOGNIZER_HEADER_DTMF_TERM_TIMEOUT, 48 RECOGNIZER_HEADER_DTMF_TERM_CHAR, 49 RECOGNIZER_HEADER_FAILED_URI, 50 RECOGNIZER_HEADER_FAILED_URI_CAUSE, 51 RECOGNIZER_HEADER_SAVE_WAVEFORM, 52 RECOGNIZER_HEADER_NEW_AUDIO_CHANNEL, 53 RECOGNIZER_HEADER_SPEECH_LANGUAGE, 54 55 /** Additional header fields for MRCP v2 */ 56 RECOGNIZER_HEADER_INPUT_TYPE, 57 RECOGNIZER_HEADER_INPUT_WAVEFORM_URI, 58 RECOGNIZER_HEADER_COMPLETION_REASON, 59 RECOGNIZER_HEADER_MEDIA_TYPE, 60 RECOGNIZER_HEADER_VER_BUFFER_UTTERANCE, 61 RECOGNIZER_HEADER_RECOGNITION_MODE, 62 RECOGNIZER_HEADER_CANCEL_IF_QUEUE, 63 RECOGNIZER_HEADER_HOTWORD_MAX_DURATION, 64 RECOGNIZER_HEADER_HOTWORD_MIN_DURATION, 65 RECOGNIZER_HEADER_INTERPRET_TEXT, 66 RECOGNIZER_HEADER_DTMF_BUFFER_TIME, 67 RECOGNIZER_HEADER_CLEAR_DTMF_BUFFER, 68 RECOGNIZER_HEADER_EARLY_NO_MATCH, 69 RECOGNIZER_HEADER_NUM_MIN_CONSISTENT_PRONUNCIATIONS, 70 RECOGNIZER_HEADER_CONSISTENCY_THRESHOLD, 71 RECOGNIZER_HEADER_CLASH_THRESHOLD, 72 RECOGNIZER_HEADER_PERSONAL_GRAMMAR_URI, 73 RECOGNIZER_HEADER_ENROLL_UTTERANCE, 74 RECOGNIZER_HEADER_PHRASE_ID, 75 RECOGNIZER_HEADER_PHRASE_NL, 76 RECOGNIZER_HEADER_WEIGHT, 77 RECOGNIZER_HEADER_SAVE_BEST_WAVEFORM, 78 RECOGNIZER_HEADER_NEW_PHRASE_ID, 79 RECOGNIZER_HEADER_CONFUSABLE_PHRASES_URI, 80 RECOGNIZER_HEADER_ABORT_PHRASE_ENROLLMENT, 81 82 RECOGNIZER_HEADER_COUNT 83 } mrcp_recognizer_header_id; 84 85 86 /** MRCP recognizer completion-cause */ 87 typedef enum { 88 RECOGNIZER_COMPLETION_CAUSE_SUCCESS = 0, 89 RECOGNIZER_COMPLETION_CAUSE_NO_MATCH = 1, 90 RECOGNIZER_COMPLETION_CAUSE_NO_INPUT_TIMEOUT = 2, 91 RECOGNIZER_COMPLETION_CAUSE_RECOGNITION_TIMEOUT = 3, 92 RECOGNIZER_COMPLETION_CAUSE_GRAM_LOAD_FAILURE = 4, 93 RECOGNIZER_COMPLETION_CAUSE_GRAM_COMP_FAILURE = 5, 94 RECOGNIZER_COMPLETION_CAUSE_ERROR = 6, 95 RECOGNIZER_COMPLETION_CAUSE_SPEECH_TOO_EARLY = 7, 96 RECOGNIZER_COMPLETION_CAUSE_TOO_MUCH_SPEECH_TIMEOUT = 8, 97 RECOGNIZER_COMPLETION_CAUSE_URI_FAILURE = 9, 98 RECOGNIZER_COMPLETION_CAUSE_LANGUAGE_UNSUPPORTED = 10, 99 100 /** Additional completion-cause for MRCP v2 */ 101 RECOGNIZER_COMPLETION_CAUSE_CANCELLED = 11, 102 RECOGNIZER_COMPLETION_CAUSE_SEMANTICS_FAILURE = 12, 103 RECOGNIZER_COMPLETION_CAUSE_PARTIAL_MATCH = 13, 104 RECOGNIZER_COMPLETION_CAUSE_PARTIAL_MATCH_MAXTIME = 14, 105 RECOGNIZER_COMPLETION_CAUSE_NO_MATCH_MAXTIME = 15, 106 RECOGNIZER_COMPLETION_CAUSE_GRAM_DEFINITION_FAILURE = 16, 107 108 RECOGNIZER_COMPLETION_CAUSE_COUNT = 17, 109 RECOGNIZER_COMPLETION_CAUSE_UNKNOWN = RECOGNIZER_COMPLETION_CAUSE_COUNT 110 } mrcp_recog_completion_cause_e; 111 112 113 114 /** MRCP recognizer-header declaration */ 115 typedef struct mrcp_recog_header_t mrcp_recog_header_t; 116 117 /** MRCP recognizer-header */ 118 struct mrcp_recog_header_t { 119 /** Tells the recognizer resource what confidence level the client considers a 120 successful match */ 121 float confidence_threshold; 122 /** To filter out background noise and not mistake it for speech */ 123 float sensitivity_level; 124 /** Tunable towards Performance or Accuracy */ 125 float speed_vs_accuracy; 126 /** The client, by setting this header, can ask the recognition resource 127 to send it more than 1 alternative */ 128 apr_size_t n_best_list_length; 129 /** The client can use the no-input-timeout header to set this timeout */ 130 apr_size_t no_input_timeout; 131 /** The client can use the recognition-timeout header to set this timeout */ 132 apr_size_t recognition_timeout; 133 /** MUST be present in the RECOGNITION-COMPLETE event if the Save-Waveform 134 header was set to true */ 135 apt_str_t waveform_uri; 136 /** MUST be part of a RECOGNITION-COMPLETE, event coming from 137 the recognizer resource to the client */ 138 mrcp_recog_completion_cause_e completion_cause; 139 /** MAY be sent as part of the SET-PARAMS or GET-PARAMS request */ 140 apt_str_t recognizer_context_block; 141 /** MAY be sent as part of the RECOGNIZE request. A value of false tells 142 the recognizer to start recognition, but not to start the no-input timer yet */ 143 apt_bool_t start_input_timers; 144 /** Specifies the length of silence required following user 145 speech before the speech recognizer finalizes a result */ 146 apr_size_t speech_complete_timeout; 147 /** Specifies the required length of silence following user 148 speech after which a recognizer finalizes a result */ 149 apr_size_t speech_incomplete_timeout; 150 /** Specifies the inter-digit timeout value to use when 151 recognizing DTMF input */ 152 apr_size_t dtmf_interdigit_timeout; 153 /** Specifies the terminating timeout to use when 154 recognizing DTMF input*/ 155 apr_size_t dtmf_term_timeout; 156 /** Specifies the terminating DTMF character for DTMF input 157 recognition */ 158 char dtmf_term_char; 159 /** When a recognizer needs to fetch or access a URI and the access fails 160 the server SHOULD provide the failed URI in this header in the method response*/ 161 apt_str_t failed_uri; 162 /** When a recognizer method needs a recognizer to fetch or access a URI 163 and the access fails the server MUST provide the URI specific or 164 protocol specific response code for the URI in the Failed-URI header */ 165 apt_str_t failed_uri_cause; 166 /** Allows the client to request the recognizer resource to 167 save the audio input to the recognizer */ 168 apt_bool_t save_waveform; 169 /** MAY be specified in a RECOGNIZE request and allows the 170 client to tell the server that, from this point on, further input 171 audio comes from a different audio source */ 172 apt_bool_t new_audio_channel; 173 /** Specifies the language of recognition grammar data within 174 a session or request, if it is not specified within the data */ 175 apt_str_t speech_language; 176 177 /** Additional header fields for MRCP v2 */ 178 /** Specifies if the input that caused a barge-in was DTMF or speech */ 179 apt_str_t input_type; 180 /** Optional header specifies a URI pointing to audio content to be 181 processed by the RECOGNIZE operation */ 182 apt_str_t input_waveform_uri; 183 /** MAY be specified in a RECOGNITION-COMPLETE event coming from 184 the recognizer resource to the client */ 185 apt_str_t completion_reason; 186 /** Tells the server resource the Media Type in which to store captured 187 audio such as the one captured and returned by the Waveform-URI header */ 188 apt_str_t media_type; 189 /** Lets the client request the server to buffer the 190 utterance associated with this recognition request into a buffer 191 available to a co-resident verification resource */ 192 apt_bool_t ver_buffer_utterance; 193 /** Specifies what mode the RECOGNIZE method will operate in */ 194 apt_str_t recognition_mode; 195 /** Specifies what will happen if the client attempts to 196 invoke another RECOGNIZE method when this RECOGNIZE request is 197 already in progress for the resource*/ 198 apt_bool_t cancel_if_queue; 199 /** Specifies the maximum length of an utterance (in seconds) that will 200 be considered for Hotword recognition */ 201 apr_size_t hotword_max_duration; 202 /** Specifies the minimum length of an utterance (in seconds) that will 203 be considered for Hotword recognition */ 204 apr_size_t hotword_min_duration; 205 /** Provides a pointer to the text for which a natural language interpretation is desired */ 206 apt_str_t interpret_text; 207 /** MAY be specified in a GET-PARAMS or SET-PARAMS method and 208 is used to specify the size in time, in milliseconds, of the 209 typeahead buffer for the recognizer */ 210 apr_size_t dtmf_buffer_time; 211 /** MAY be specified in a RECOGNIZE method and is used to 212 tell the recognizer to clear the DTMF type-ahead buffer before 213 starting the recognize */ 214 apt_bool_t clear_dtmf_buffer; 215 /** MAY be specified in a RECOGNIZE method and is used to 216 tell the recognizer that it MUST not wait for the end of speech 217 before processing the collected speech to match active grammars */ 218 apt_bool_t early_no_match; 219 /** MAY be specified in a START-PHRASE-ENROLLMENT, "SET-PARAMS", or 220 "GET-PARAMS" method and is used to specify the minimum number of 221 consistent pronunciations that must be obtained to voice enroll a new phrase */ 222 apr_size_t num_min_consistent_pronunciations; 223 /** MAY be sent as part of the START-PHRASE-ENROLLMENT,"SET-PARAMS", or 224 "GET-PARAMS" method and is used during voice-enrollment to specify how similar 225 to a previously enrolled pronunciation of the same phrase an utterance needs 226 to be in order to be considered "consistent" */ 227 float consistency_threshold; 228 /** MAY be sent as part of the START-PHRASE-ENROLLMENT, SET-PARAMS, or 229 "GET-PARAMS" method and is used during voice-enrollment to specify 230 how similar the pronunciations of two different phrases can be 231 before they are considered to be clashing */ 232 float clash_threshold; 233 /** Specifies the speaker-trained grammar to be used or 234 referenced during enrollment operations */ 235 apt_str_t personal_grammar_uri; 236 /** MAY be specified in the RECOGNIZE method. If this header 237 is set to "true" and an Enrollment is active, the RECOGNIZE command 238 MUST add the collected utterance to the personal grammar that is 239 being enrolled */ 240 apt_bool_t enroll_utterance; 241 /** Identifies a phrase in an existing personal grammar for which 242 enrollment is desired. It is also returned to the client in the 243 RECOGNIZE complete event */ 244 apt_str_t phrase_id; 245 /** Specifies the interpreted text to be returned when the 246 phrase is recognized */ 247 apt_str_t phrase_nl; 248 /** Represents the occurrence likelihood of a phrase in an enrolled grammar */ 249 float weight; 250 /** Allows the client to request the recognizer resource to 251 save the audio stream for the best repetition of the phrase that was 252 used during the enrollment session */ 253 apt_bool_t save_best_waveform; 254 /** Replaces the id used to identify the phrase in a personal grammar */ 255 apt_str_t new_phrase_id; 256 /** Specifies a grammar that defines invalid phrases for enrollment */ 257 apt_str_t confusable_phrases_uri; 258 /** Can optionally be specified in the END-PHRASE-ENROLLMENT 259 method to abort the phrase enrollment, rather than committing the 260 phrase to the personal grammar */ 261 apt_bool_t abort_phrase_enrollment; 262 }; 263 264 265 /** Get recognizer header vtable */ 266 const mrcp_header_vtable_t* mrcp_recog_header_vtable_get(mrcp_version_e version); 267 268 /** Get recognizer completion cause string */ 269 MRCP_DECLARE(const apt_str_t*) mrcp_recog_completion_cause_get(mrcp_recog_completion_cause_e completion_cause, mrcp_version_e version); 270 271 APT_END_EXTERN_C 272 273 #endif /* MRCP_RECOG_HEADER_H */ 274