1"""Generated message classes for speech version v1. 2 3Converts audio to text by applying powerful neural network models. 4""" 5# NOTE: This file is autogenerated and should not be edited by hand. 6 7from __future__ import absolute_import 8 9from apitools.base.protorpclite import messages as _messages 10from apitools.base.py import encoding 11from apitools.base.py import extra_types 12 13 14package = 'speech' 15 16 17class ListOperationsResponse(_messages.Message): 18 r"""The response message for Operations.ListOperations. 19 20 Fields: 21 nextPageToken: The standard List next-page token. 22 operations: A list of operations that matches the specified filter in the 23 request. 24 """ 25 26 nextPageToken = _messages.StringField(1) 27 operations = _messages.MessageField('Operation', 2, repeated=True) 28 29 30class LongRunningRecognizeMetadata(_messages.Message): 31 r"""Describes the progress of a long-running `LongRunningRecognize` call. It 32 is included in the `metadata` field of the `Operation` returned by the 33 `GetOperation` call of the `google::longrunning::Operations` service. 34 35 Fields: 36 lastUpdateTime: Time of the most recent processing update. 37 progressPercent: Approximate percentage of audio processed thus far. 38 Guaranteed to be 100 when the audio is fully processed and the results 39 are available. 40 startTime: Time when the request was received. 41 uri: Output only. The URI of the audio file being transcribed. Empty if 42 the audio was sent as byte content. 43 """ 44 45 lastUpdateTime = _messages.StringField(1) 46 progressPercent = _messages.IntegerField(2, variant=_messages.Variant.INT32) 47 startTime = _messages.StringField(3) 48 uri = _messages.StringField(4) 49 50 51class LongRunningRecognizeRequest(_messages.Message): 52 r"""The top-level message sent by the client for the `LongRunningRecognize` 53 method. 54 55 Fields: 56 audio: Required. The audio data to be recognized. 57 config: Required. Provides information to the recognizer that specifies 58 how to process the request. 59 """ 60 61 audio = _messages.MessageField('RecognitionAudio', 1) 62 config = _messages.MessageField('RecognitionConfig', 2) 63 64 65class LongRunningRecognizeResponse(_messages.Message): 66 r"""The only message returned to the client by the `LongRunningRecognize` 67 method. It contains the result as zero or more sequential 68 `SpeechRecognitionResult` messages. It is included in the `result.response` 69 field of the `Operation` returned by the `GetOperation` call of the 70 `google::longrunning::Operations` service. 71 72 Fields: 73 results: Sequential list of transcription results corresponding to 74 sequential portions of audio. 75 """ 76 77 results = _messages.MessageField('SpeechRecognitionResult', 1, repeated=True) 78 79 80class Operation(_messages.Message): 81 r"""This resource represents a long-running operation that is the result of 82 a network API call. 83 84 Messages: 85 MetadataValue: Service-specific metadata associated with the operation. It 86 typically contains progress information and common metadata such as 87 create time. Some services might not provide such metadata. Any method 88 that returns a long-running operation should document the metadata type, 89 if any. 90 ResponseValue: The normal response of the operation in case of success. If 91 the original method returns no data on success, such as `Delete`, the 92 response is `google.protobuf.Empty`. If the original method is standard 93 `Get`/`Create`/`Update`, the response should be the resource. For other 94 methods, the response should have the type `XxxResponse`, where `Xxx` is 95 the original method name. For example, if the original method name is 96 `TakeSnapshot()`, the inferred response type is `TakeSnapshotResponse`. 97 98 Fields: 99 done: If the value is `false`, it means the operation is still in 100 progress. If `true`, the operation is completed, and either `error` or 101 `response` is available. 102 error: The error result of the operation in case of failure or 103 cancellation. 104 metadata: Service-specific metadata associated with the operation. It 105 typically contains progress information and common metadata such as 106 create time. Some services might not provide such metadata. Any method 107 that returns a long-running operation should document the metadata type, 108 if any. 109 name: The server-assigned name, which is only unique within the same 110 service that originally returns it. If you use the default HTTP mapping, 111 the `name` should be a resource name ending with 112 `operations/{unique_id}`. 113 response: The normal response of the operation in case of success. If the 114 original method returns no data on success, such as `Delete`, the 115 response is `google.protobuf.Empty`. If the original method is standard 116 `Get`/`Create`/`Update`, the response should be the resource. For other 117 methods, the response should have the type `XxxResponse`, where `Xxx` is 118 the original method name. For example, if the original method name is 119 `TakeSnapshot()`, the inferred response type is `TakeSnapshotResponse`. 120 """ 121 122 @encoding.MapUnrecognizedFields('additionalProperties') 123 class MetadataValue(_messages.Message): 124 r"""Service-specific metadata associated with the operation. It typically 125 contains progress information and common metadata such as create time. 126 Some services might not provide such metadata. Any method that returns a 127 long-running operation should document the metadata type, if any. 128 129 Messages: 130 AdditionalProperty: An additional property for a MetadataValue object. 131 132 Fields: 133 additionalProperties: Properties of the object. Contains field @type 134 with type URL. 135 """ 136 137 class AdditionalProperty(_messages.Message): 138 r"""An additional property for a MetadataValue object. 139 140 Fields: 141 key: Name of the additional property. 142 value: A extra_types.JsonValue attribute. 143 """ 144 145 key = _messages.StringField(1) 146 value = _messages.MessageField('extra_types.JsonValue', 2) 147 148 additionalProperties = _messages.MessageField('AdditionalProperty', 1, repeated=True) 149 150 @encoding.MapUnrecognizedFields('additionalProperties') 151 class ResponseValue(_messages.Message): 152 r"""The normal response of the operation in case of success. If the 153 original method returns no data on success, such as `Delete`, the response 154 is `google.protobuf.Empty`. If the original method is standard 155 `Get`/`Create`/`Update`, the response should be the resource. For other 156 methods, the response should have the type `XxxResponse`, where `Xxx` is 157 the original method name. For example, if the original method name is 158 `TakeSnapshot()`, the inferred response type is `TakeSnapshotResponse`. 159 160 Messages: 161 AdditionalProperty: An additional property for a ResponseValue object. 162 163 Fields: 164 additionalProperties: Properties of the object. Contains field @type 165 with type URL. 166 """ 167 168 class AdditionalProperty(_messages.Message): 169 r"""An additional property for a ResponseValue object. 170 171 Fields: 172 key: Name of the additional property. 173 value: A extra_types.JsonValue attribute. 174 """ 175 176 key = _messages.StringField(1) 177 value = _messages.MessageField('extra_types.JsonValue', 2) 178 179 additionalProperties = _messages.MessageField('AdditionalProperty', 1, repeated=True) 180 181 done = _messages.BooleanField(1) 182 error = _messages.MessageField('Status', 2) 183 metadata = _messages.MessageField('MetadataValue', 3) 184 name = _messages.StringField(4) 185 response = _messages.MessageField('ResponseValue', 5) 186 187 188class RecognitionAudio(_messages.Message): 189 r"""Contains audio data in the encoding specified in the 190 `RecognitionConfig`. Either `content` or `uri` must be supplied. Supplying 191 both or neither returns google.rpc.Code.INVALID_ARGUMENT. See [content 192 limits](https://cloud.google.com/speech-to-text/quotas#content). 193 194 Fields: 195 content: The audio data bytes encoded as specified in `RecognitionConfig`. 196 Note: as with all bytes fields, proto buffers use a pure binary 197 representation, whereas JSON representations use base64. 198 uri: URI that points to a file that contains audio data bytes as specified 199 in `RecognitionConfig`. The file must not be compressed (for example, 200 gzip). Currently, only Google Cloud Storage URIs are supported, which 201 must be specified in the following format: 202 `gs://bucket_name/object_name` (other URI formats return 203 google.rpc.Code.INVALID_ARGUMENT). For more information, see [Request 204 URIs](https://cloud.google.com/storage/docs/reference-uris). 205 """ 206 207 content = _messages.BytesField(1) 208 uri = _messages.StringField(2) 209 210 211class RecognitionConfig(_messages.Message): 212 r"""Provides information to the recognizer that specifies how to process the 213 request. 214 215 Enums: 216 EncodingValueValuesEnum: Encoding of audio data sent in all 217 `RecognitionAudio` messages. This field is optional for `FLAC` and `WAV` 218 audio files and required for all other audio formats. For details, see 219 AudioEncoding. 220 221 Fields: 222 audioChannelCount: The number of channels in the input audio data. ONLY 223 set this for MULTI-CHANNEL recognition. Valid values for LINEAR16 and 224 FLAC are `1`-`8`. Valid values for OGG_OPUS are '1'-'254'. Valid value 225 for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or 226 omitted, defaults to one channel (mono). Note: We only recognize the 227 first channel by default. To perform independent recognition on each 228 channel set `enable_separate_recognition_per_channel` to 'true'. 229 diarizationConfig: Config to enable speaker diarization and set additional 230 parameters to make diarization better suited for your application. Note: 231 When this is enabled, we send all the words from the beginning of the 232 audio for the top alternative in every consecutive STREAMING responses. 233 This is done in order to improve our speaker tags as our models learn to 234 identify the speakers in the conversation over time. For non-streaming 235 requests, the diarization results will be provided only in the top 236 alternative of the FINAL SpeechRecognitionResult. 237 enableAutomaticPunctuation: If 'true', adds punctuation to recognition 238 result hypotheses. This feature is only available in select languages. 239 Setting this for requests in other languages has no effect at all. The 240 default 'false' value does not add punctuation to result hypotheses. 241 enableSeparateRecognitionPerChannel: This needs to be set to `true` 242 explicitly and `audio_channel_count` > 1 to get each channel recognized 243 separately. The recognition result will contain a `channel_tag` field to 244 state which channel that result belongs to. If this is not true, we will 245 only recognize the first channel. The request is billed cumulatively for 246 all channels recognized: `audio_channel_count` multiplied by the length 247 of the audio. 248 enableWordTimeOffsets: If `true`, the top result includes a list of words 249 and the start and end time offsets (timestamps) for those words. If 250 `false`, no word-level time offset information is returned. The default 251 is `false`. 252 encoding: Encoding of audio data sent in all `RecognitionAudio` messages. 253 This field is optional for `FLAC` and `WAV` audio files and required for 254 all other audio formats. For details, see AudioEncoding. 255 languageCode: Required. The language of the supplied audio as a 256 [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag. 257 Example: "en-US". See [Language 258 Support](https://cloud.google.com/speech-to-text/docs/languages) for a 259 list of the currently supported language codes. 260 maxAlternatives: Maximum number of recognition hypotheses to be returned. 261 Specifically, the maximum number of `SpeechRecognitionAlternative` 262 messages within each `SpeechRecognitionResult`. The server may return 263 fewer than `max_alternatives`. Valid values are `0`-`30`. A value of `0` 264 or `1` will return a maximum of one. If omitted, will return a maximum 265 of one. 266 metadata: Metadata regarding this request. 267 model: Which model to select for the given request. Select the model best 268 suited to your domain to get best results. If a model is not explicitly 269 specified, then we auto-select a model based on the parameters in the 270 RecognitionConfig. *Model* *Description* command_and_search Best for 271 short queries such as voice commands or voice search. phone_call Best 272 for audio that originated from a phone call (typically recorded at an 273 8khz sampling rate). video Best for audio that originated from from 274 video or includes multiple speakers. Ideally the audio is recorded at a 275 16khz or greater sampling rate. This is a premium model that costs more 276 than the standard rate. default Best for audio that is not one of the 277 specific audio models. For example, long-form audio. Ideally the audio 278 is high-fidelity, recorded at a 16khz or greater sampling rate. 279 profanityFilter: If set to `true`, the server will attempt to filter out 280 profanities, replacing all but the initial character in each filtered 281 word with asterisks, e.g. "f***". If set to `false` or omitted, 282 profanities won't be filtered out. 283 sampleRateHertz: Sample rate in Hertz of the audio data sent in all 284 `RecognitionAudio` messages. Valid values are: 8000-48000. 16000 is 285 optimal. For best results, set the sampling rate of the audio source to 286 16000 Hz. If that's not possible, use the native sample rate of the 287 audio source (instead of re-sampling). This field is optional for FLAC 288 and WAV audio files, but is required for all other audio formats. For 289 details, see AudioEncoding. 290 speechContexts: Array of SpeechContext. A means to provide context to 291 assist the speech recognition. For more information, see [speech 292 adaptation](https://cloud.google.com/speech-to-text/docs/context- 293 strength). 294 useEnhanced: Set to true to use an enhanced model for speech recognition. 295 If `use_enhanced` is set to true and the `model` field is not set, then 296 an appropriate enhanced model is chosen if an enhanced model exists for 297 the audio. If `use_enhanced` is true and an enhanced version of the 298 specified model does not exist, then the speech is recognized using the 299 standard version of the specified model. 300 """ 301 302 class EncodingValueValuesEnum(_messages.Enum): 303 r"""Encoding of audio data sent in all `RecognitionAudio` messages. This 304 field is optional for `FLAC` and `WAV` audio files and required for all 305 other audio formats. For details, see AudioEncoding. 306 307 Values: 308 ENCODING_UNSPECIFIED: Not specified. 309 LINEAR16: Uncompressed 16-bit signed little-endian samples (Linear PCM). 310 FLAC: `FLAC` (Free Lossless Audio Codec) is the recommended encoding 311 because it is lossless--therefore recognition is not compromised--and 312 requires only about half the bandwidth of `LINEAR16`. `FLAC` stream 313 encoding supports 16-bit and 24-bit samples, however, not all fields 314 in `STREAMINFO` are supported. 315 MULAW: 8-bit samples that compand 14-bit audio samples using G.711 316 PCMU/mu-law. 317 AMR: Adaptive Multi-Rate Narrowband codec. `sample_rate_hertz` must be 318 8000. 319 AMR_WB: Adaptive Multi-Rate Wideband codec. `sample_rate_hertz` must be 320 16000. 321 OGG_OPUS: Opus encoded audio frames in Ogg container 322 ([OggOpus](https://wiki.xiph.org/OggOpus)). `sample_rate_hertz` must 323 be one of 8000, 12000, 16000, 24000, or 48000. 324 SPEEX_WITH_HEADER_BYTE: Although the use of lossy encodings is not 325 recommended, if a very low bitrate encoding is required, `OGG_OPUS` is 326 highly preferred over Speex encoding. The [Speex](https://speex.org/) 327 encoding supported by Cloud Speech API has a header byte in each 328 block, as in MIME type `audio/x-speex-with-header-byte`. It is a 329 variant of the RTP Speex encoding defined in [RFC 330 5574](https://tools.ietf.org/html/rfc5574). The stream is a sequence 331 of blocks, one block per RTP packet. Each block starts with a byte 332 containing the length of the block, in bytes, followed by one or more 333 frames of Speex data, padded to an integral number of bytes (octets) 334 as specified in RFC 5574. In other words, each RTP header is replaced 335 with a single byte containing the block length. Only Speex wideband is 336 supported. `sample_rate_hertz` must be 16000. 337 """ 338 ENCODING_UNSPECIFIED = 0 339 LINEAR16 = 1 340 FLAC = 2 341 MULAW = 3 342 AMR = 4 343 AMR_WB = 5 344 OGG_OPUS = 6 345 SPEEX_WITH_HEADER_BYTE = 7 346 347 audioChannelCount = _messages.IntegerField(1, variant=_messages.Variant.INT32) 348 diarizationConfig = _messages.MessageField('SpeakerDiarizationConfig', 2) 349 enableAutomaticPunctuation = _messages.BooleanField(3) 350 enableSeparateRecognitionPerChannel = _messages.BooleanField(4) 351 enableWordTimeOffsets = _messages.BooleanField(5) 352 encoding = _messages.EnumField('EncodingValueValuesEnum', 6) 353 languageCode = _messages.StringField(7) 354 maxAlternatives = _messages.IntegerField(8, variant=_messages.Variant.INT32) 355 metadata = _messages.MessageField('RecognitionMetadata', 9) 356 model = _messages.StringField(10) 357 profanityFilter = _messages.BooleanField(11) 358 sampleRateHertz = _messages.IntegerField(12, variant=_messages.Variant.INT32) 359 speechContexts = _messages.MessageField('SpeechContext', 13, repeated=True) 360 useEnhanced = _messages.BooleanField(14) 361 362 363class RecognitionMetadata(_messages.Message): 364 r"""Description of audio data to be recognized. 365 366 Enums: 367 InteractionTypeValueValuesEnum: The use case most closely describing the 368 audio content to be recognized. 369 MicrophoneDistanceValueValuesEnum: The audio type that most closely 370 describes the audio being recognized. 371 OriginalMediaTypeValueValuesEnum: The original media the speech was 372 recorded on. 373 RecordingDeviceTypeValueValuesEnum: The type of device the speech was 374 recorded with. 375 376 Fields: 377 audioTopic: Description of the content. Eg. "Recordings of federal supreme 378 court hearings from 2012". 379 industryNaicsCodeOfAudio: The industry vertical to which this speech 380 recognition request most closely applies. This is most indicative of the 381 topics contained in the audio. Use the 6-digit NAICS code to identify 382 the industry vertical - see https://www.naics.com/search/. 383 interactionType: The use case most closely describing the audio content to 384 be recognized. 385 microphoneDistance: The audio type that most closely describes the audio 386 being recognized. 387 originalMediaType: The original media the speech was recorded on. 388 originalMimeType: Mime type of the original audio file. For example 389 `audio/m4a`, `audio/x-alaw-basic`, `audio/mp3`, `audio/3gpp`. A list of 390 possible audio mime types is maintained at 391 http://www.iana.org/assignments/media-types/media-types.xhtml#audio 392 recordingDeviceName: The device used to make the recording. Examples 393 'Nexus 5X' or 'Polycom SoundStation IP 6000' or 'POTS' or 'VoIP' or 394 'Cardioid Microphone'. 395 recordingDeviceType: The type of device the speech was recorded with. 396 """ 397 398 class InteractionTypeValueValuesEnum(_messages.Enum): 399 r"""The use case most closely describing the audio content to be 400 recognized. 401 402 Values: 403 INTERACTION_TYPE_UNSPECIFIED: Use case is either unknown or is something 404 other than one of the other values below. 405 DISCUSSION: Multiple people in a conversation or discussion. For example 406 in a meeting with two or more people actively participating. Typically 407 all the primary people speaking would be in the same room (if not, see 408 PHONE_CALL) 409 PRESENTATION: One or more persons lecturing or presenting to others, 410 mostly uninterrupted. 411 PHONE_CALL: A phone-call or video-conference in which two or more 412 people, who are not in the same room, are actively participating. 413 VOICEMAIL: A recorded message intended for another person to listen to. 414 PROFESSIONALLY_PRODUCED: Professionally produced audio (eg. TV Show, 415 Podcast). 416 VOICE_SEARCH: Transcribe spoken questions and queries into text. 417 VOICE_COMMAND: Transcribe voice commands, such as for controlling a 418 device. 419 DICTATION: Transcribe speech to text to create a written document, such 420 as a text-message, email or report. 421 """ 422 INTERACTION_TYPE_UNSPECIFIED = 0 423 DISCUSSION = 1 424 PRESENTATION = 2 425 PHONE_CALL = 3 426 VOICEMAIL = 4 427 PROFESSIONALLY_PRODUCED = 5 428 VOICE_SEARCH = 6 429 VOICE_COMMAND = 7 430 DICTATION = 8 431 432 class MicrophoneDistanceValueValuesEnum(_messages.Enum): 433 r"""The audio type that most closely describes the audio being recognized. 434 435 Values: 436 MICROPHONE_DISTANCE_UNSPECIFIED: Audio type is not known. 437 NEARFIELD: The audio was captured from a closely placed microphone. Eg. 438 phone, dictaphone, or handheld microphone. Generally if there speaker 439 is within 1 meter of the microphone. 440 MIDFIELD: The speaker if within 3 meters of the microphone. 441 FARFIELD: The speaker is more than 3 meters away from the microphone. 442 """ 443 MICROPHONE_DISTANCE_UNSPECIFIED = 0 444 NEARFIELD = 1 445 MIDFIELD = 2 446 FARFIELD = 3 447 448 class OriginalMediaTypeValueValuesEnum(_messages.Enum): 449 r"""The original media the speech was recorded on. 450 451 Values: 452 ORIGINAL_MEDIA_TYPE_UNSPECIFIED: Unknown original media type. 453 AUDIO: The speech data is an audio recording. 454 VIDEO: The speech data originally recorded on a video. 455 """ 456 ORIGINAL_MEDIA_TYPE_UNSPECIFIED = 0 457 AUDIO = 1 458 VIDEO = 2 459 460 class RecordingDeviceTypeValueValuesEnum(_messages.Enum): 461 r"""The type of device the speech was recorded with. 462 463 Values: 464 RECORDING_DEVICE_TYPE_UNSPECIFIED: The recording device is unknown. 465 SMARTPHONE: Speech was recorded on a smartphone. 466 PC: Speech was recorded using a personal computer or tablet. 467 PHONE_LINE: Speech was recorded over a phone line. 468 VEHICLE: Speech was recorded in a vehicle. 469 OTHER_OUTDOOR_DEVICE: Speech was recorded outdoors. 470 OTHER_INDOOR_DEVICE: Speech was recorded indoors. 471 """ 472 RECORDING_DEVICE_TYPE_UNSPECIFIED = 0 473 SMARTPHONE = 1 474 PC = 2 475 PHONE_LINE = 3 476 VEHICLE = 4 477 OTHER_OUTDOOR_DEVICE = 5 478 OTHER_INDOOR_DEVICE = 6 479 480 audioTopic = _messages.StringField(1) 481 industryNaicsCodeOfAudio = _messages.IntegerField(2, variant=_messages.Variant.UINT32) 482 interactionType = _messages.EnumField('InteractionTypeValueValuesEnum', 3) 483 microphoneDistance = _messages.EnumField('MicrophoneDistanceValueValuesEnum', 4) 484 originalMediaType = _messages.EnumField('OriginalMediaTypeValueValuesEnum', 5) 485 originalMimeType = _messages.StringField(6) 486 recordingDeviceName = _messages.StringField(7) 487 recordingDeviceType = _messages.EnumField('RecordingDeviceTypeValueValuesEnum', 8) 488 489 490class RecognizeRequest(_messages.Message): 491 r"""The top-level message sent by the client for the `Recognize` method. 492 493 Fields: 494 audio: Required. The audio data to be recognized. 495 config: Required. Provides information to the recognizer that specifies 496 how to process the request. 497 """ 498 499 audio = _messages.MessageField('RecognitionAudio', 1) 500 config = _messages.MessageField('RecognitionConfig', 2) 501 502 503class RecognizeResponse(_messages.Message): 504 r"""The only message returned to the client by the `Recognize` method. It 505 contains the result as zero or more sequential `SpeechRecognitionResult` 506 messages. 507 508 Fields: 509 results: Sequential list of transcription results corresponding to 510 sequential portions of audio. 511 """ 512 513 results = _messages.MessageField('SpeechRecognitionResult', 1, repeated=True) 514 515 516class SpeakerDiarizationConfig(_messages.Message): 517 r"""Config to enable speaker diarization. 518 519 Fields: 520 enableSpeakerDiarization: If 'true', enables speaker detection for each 521 recognized word in the top alternative of the recognition result using a 522 speaker_tag provided in the WordInfo. 523 maxSpeakerCount: Maximum number of speakers in the conversation. This 524 range gives you more flexibility by allowing the system to automatically 525 determine the correct number of speakers. If not set, the default value 526 is 6. 527 minSpeakerCount: Minimum number of speakers in the conversation. This 528 range gives you more flexibility by allowing the system to automatically 529 determine the correct number of speakers. If not set, the default value 530 is 2. 531 speakerTag: Output only. Unused. 532 """ 533 534 enableSpeakerDiarization = _messages.BooleanField(1) 535 maxSpeakerCount = _messages.IntegerField(2, variant=_messages.Variant.INT32) 536 minSpeakerCount = _messages.IntegerField(3, variant=_messages.Variant.INT32) 537 speakerTag = _messages.IntegerField(4, variant=_messages.Variant.INT32) 538 539 540class SpeechContext(_messages.Message): 541 r"""Provides "hints" to the speech recognizer to favor specific words and 542 phrases in the results. 543 544 Fields: 545 phrases: A list of strings containing words and phrases "hints" so that 546 the speech recognition is more likely to recognize them. This can be 547 used to improve the accuracy for specific words and phrases, for 548 example, if specific commands are typically spoken by the user. This can 549 also be used to add additional words to the vocabulary of the 550 recognizer. See [usage limits](https://cloud.google.com/speech-to- 551 text/quotas#content). List items can also be set to classes for groups 552 of words that represent common concepts that occur in natural language. 553 For example, rather than providing phrase hints for every month of the 554 year, using the $MONTH class improves the likelihood of correctly 555 transcribing audio that includes months. 556 """ 557 558 phrases = _messages.StringField(1, repeated=True) 559 560 561class SpeechOperationsGetRequest(_messages.Message): 562 r"""A SpeechOperationsGetRequest object. 563 564 Fields: 565 name: The name of the operation resource. 566 """ 567 568 name = _messages.StringField(1, required=True) 569 570 571class SpeechOperationsListRequest(_messages.Message): 572 r"""A SpeechOperationsListRequest object. 573 574 Fields: 575 filter: The standard list filter. 576 name: The name of the operation's parent resource. 577 pageSize: The standard list page size. 578 pageToken: The standard list page token. 579 """ 580 581 filter = _messages.StringField(1) 582 name = _messages.StringField(2) 583 pageSize = _messages.IntegerField(3, variant=_messages.Variant.INT32) 584 pageToken = _messages.StringField(4) 585 586 587class SpeechRecognitionAlternative(_messages.Message): 588 r"""Alternative hypotheses (a.k.a. n-best list). 589 590 Fields: 591 confidence: The confidence estimate between 0.0 and 1.0. A higher number 592 indicates an estimated greater likelihood that the recognized words are 593 correct. This field is set only for the top alternative of a non- 594 streaming result or, of a streaming result where `is_final=true`. This 595 field is not guaranteed to be accurate and users should not rely on it 596 to be always provided. The default of 0.0 is a sentinel value indicating 597 `confidence` was not set. 598 transcript: Transcript text representing the words that the user spoke. 599 words: A list of word-specific information for each recognized word. Note: 600 When `enable_speaker_diarization` is true, you will see all the words 601 from the beginning of the audio. 602 """ 603 604 confidence = _messages.FloatField(1, variant=_messages.Variant.FLOAT) 605 transcript = _messages.StringField(2) 606 words = _messages.MessageField('WordInfo', 3, repeated=True) 607 608 609class SpeechRecognitionResult(_messages.Message): 610 r"""A speech recognition result corresponding to a portion of the audio. 611 612 Fields: 613 alternatives: May contain one or more recognition hypotheses (up to the 614 maximum specified in `max_alternatives`). These alternatives are ordered 615 in terms of accuracy, with the top (first) alternative being the most 616 probable, as ranked by the recognizer. 617 channelTag: For multi-channel audio, this is the channel number 618 corresponding to the recognized result for the audio from that channel. 619 For audio_channel_count = N, its output values can range from '1' to 620 'N'. 621 """ 622 623 alternatives = _messages.MessageField('SpeechRecognitionAlternative', 1, repeated=True) 624 channelTag = _messages.IntegerField(2, variant=_messages.Variant.INT32) 625 626 627class StandardQueryParameters(_messages.Message): 628 r"""Query parameters accepted by all methods. 629 630 Enums: 631 FXgafvValueValuesEnum: V1 error format. 632 AltValueValuesEnum: Data format for response. 633 634 Fields: 635 f__xgafv: V1 error format. 636 access_token: OAuth access token. 637 alt: Data format for response. 638 callback: JSONP 639 fields: Selector specifying which fields to include in a partial response. 640 key: API key. Your API key identifies your project and provides you with 641 API access, quota, and reports. Required unless you provide an OAuth 2.0 642 token. 643 oauth_token: OAuth 2.0 token for the current user. 644 prettyPrint: Returns response with indentations and line breaks. 645 quotaUser: Available to use for quota purposes for server-side 646 applications. Can be any arbitrary string assigned to a user, but should 647 not exceed 40 characters. 648 trace: A tracing token of the form "token:<tokenid>" to include in api 649 requests. 650 uploadType: Legacy upload protocol for media (e.g. "media", "multipart"). 651 upload_protocol: Upload protocol for media (e.g. "raw", "multipart"). 652 """ 653 654 class AltValueValuesEnum(_messages.Enum): 655 r"""Data format for response. 656 657 Values: 658 json: Responses with Content-Type of application/json 659 media: Media download with context-dependent Content-Type 660 proto: Responses with Content-Type of application/x-protobuf 661 """ 662 json = 0 663 media = 1 664 proto = 2 665 666 class FXgafvValueValuesEnum(_messages.Enum): 667 r"""V1 error format. 668 669 Values: 670 _1: v1 error format 671 _2: v2 error format 672 """ 673 _1 = 0 674 _2 = 1 675 676 f__xgafv = _messages.EnumField('FXgafvValueValuesEnum', 1) 677 access_token = _messages.StringField(2) 678 alt = _messages.EnumField('AltValueValuesEnum', 3, default='json') 679 callback = _messages.StringField(4) 680 fields = _messages.StringField(5) 681 key = _messages.StringField(6) 682 oauth_token = _messages.StringField(7) 683 prettyPrint = _messages.BooleanField(8, default=True) 684 quotaUser = _messages.StringField(9) 685 trace = _messages.StringField(10) 686 uploadType = _messages.StringField(11) 687 upload_protocol = _messages.StringField(12) 688 689 690class Status(_messages.Message): 691 r"""The `Status` type defines a logical error model that is suitable for 692 different programming environments, including REST APIs and RPC APIs. It is 693 used by [gRPC](https://github.com/grpc). Each `Status` message contains 694 three pieces of data: error code, error message, and error details. You can 695 find out more about this error model and how to work with it in the [API 696 Design Guide](https://cloud.google.com/apis/design/errors). 697 698 Messages: 699 DetailsValueListEntry: A DetailsValueListEntry object. 700 701 Fields: 702 code: The status code, which should be an enum value of google.rpc.Code. 703 details: A list of messages that carry the error details. There is a 704 common set of message types for APIs to use. 705 message: A developer-facing error message, which should be in English. Any 706 user-facing error message should be localized and sent in the 707 google.rpc.Status.details field, or localized by the client. 708 """ 709 710 @encoding.MapUnrecognizedFields('additionalProperties') 711 class DetailsValueListEntry(_messages.Message): 712 r"""A DetailsValueListEntry object. 713 714 Messages: 715 AdditionalProperty: An additional property for a DetailsValueListEntry 716 object. 717 718 Fields: 719 additionalProperties: Properties of the object. Contains field @type 720 with type URL. 721 """ 722 723 class AdditionalProperty(_messages.Message): 724 r"""An additional property for a DetailsValueListEntry object. 725 726 Fields: 727 key: Name of the additional property. 728 value: A extra_types.JsonValue attribute. 729 """ 730 731 key = _messages.StringField(1) 732 value = _messages.MessageField('extra_types.JsonValue', 2) 733 734 additionalProperties = _messages.MessageField('AdditionalProperty', 1, repeated=True) 735 736 code = _messages.IntegerField(1, variant=_messages.Variant.INT32) 737 details = _messages.MessageField('DetailsValueListEntry', 2, repeated=True) 738 message = _messages.StringField(3) 739 740 741class WordInfo(_messages.Message): 742 r"""Word-specific information for recognized words. 743 744 Fields: 745 endTime: Time offset relative to the beginning of the audio, and 746 corresponding to the end of the spoken word. This field is only set if 747 `enable_word_time_offsets=true` and only in the top hypothesis. This is 748 an experimental feature and the accuracy of the time offset can vary. 749 speakerTag: Output only. A distinct integer value is assigned for every 750 speaker within the audio. This field specifies which one of those 751 speakers was detected to have spoken this word. Value ranges from '1' to 752 diarization_speaker_count. speaker_tag is set if 753 enable_speaker_diarization = 'true' and only in the top alternative. 754 startTime: Time offset relative to the beginning of the audio, and 755 corresponding to the start of the spoken word. This field is only set if 756 `enable_word_time_offsets=true` and only in the top hypothesis. This is 757 an experimental feature and the accuracy of the time offset can vary. 758 word: The word corresponding to this set of information. 759 """ 760 761 endTime = _messages.StringField(1) 762 speakerTag = _messages.IntegerField(2, variant=_messages.Variant.INT32) 763 startTime = _messages.StringField(3) 764 word = _messages.StringField(4) 765 766 767encoding.AddCustomJsonFieldMapping( 768 StandardQueryParameters, 'f__xgafv', '$.xgafv') 769encoding.AddCustomJsonEnumMapping( 770 StandardQueryParameters.FXgafvValueValuesEnum, '_1', '1') 771encoding.AddCustomJsonEnumMapping( 772 StandardQueryParameters.FXgafvValueValuesEnum, '_2', '2') 773