1"""Generated message classes for speech version v1.
2
3Converts audio to text by applying powerful neural network models.
4"""
5# NOTE: This file is autogenerated and should not be edited by hand.
6
7from __future__ import absolute_import
8
9from apitools.base.protorpclite import messages as _messages
10from apitools.base.py import encoding
11from apitools.base.py import extra_types
12
13
14package = 'speech'
15
16
17class ListOperationsResponse(_messages.Message):
18  r"""The response message for Operations.ListOperations.
19
20  Fields:
21    nextPageToken: The standard List next-page token.
22    operations: A list of operations that matches the specified filter in the
23      request.
24  """
25
26  nextPageToken = _messages.StringField(1)
27  operations = _messages.MessageField('Operation', 2, repeated=True)
28
29
30class LongRunningRecognizeMetadata(_messages.Message):
31  r"""Describes the progress of a long-running `LongRunningRecognize` call. It
32  is included in the `metadata` field of the `Operation` returned by the
33  `GetOperation` call of the `google::longrunning::Operations` service.
34
35  Fields:
36    lastUpdateTime: Time of the most recent processing update.
37    progressPercent: Approximate percentage of audio processed thus far.
38      Guaranteed to be 100 when the audio is fully processed and the results
39      are available.
40    startTime: Time when the request was received.
41    uri: Output only. The URI of the audio file being transcribed. Empty if
42      the audio was sent as byte content.
43  """
44
45  lastUpdateTime = _messages.StringField(1)
46  progressPercent = _messages.IntegerField(2, variant=_messages.Variant.INT32)
47  startTime = _messages.StringField(3)
48  uri = _messages.StringField(4)
49
50
51class LongRunningRecognizeRequest(_messages.Message):
52  r"""The top-level message sent by the client for the `LongRunningRecognize`
53  method.
54
55  Fields:
56    audio: Required. The audio data to be recognized.
57    config: Required. Provides information to the recognizer that specifies
58      how to process the request.
59  """
60
61  audio = _messages.MessageField('RecognitionAudio', 1)
62  config = _messages.MessageField('RecognitionConfig', 2)
63
64
65class LongRunningRecognizeResponse(_messages.Message):
66  r"""The only message returned to the client by the `LongRunningRecognize`
67  method. It contains the result as zero or more sequential
68  `SpeechRecognitionResult` messages. It is included in the `result.response`
69  field of the `Operation` returned by the `GetOperation` call of the
70  `google::longrunning::Operations` service.
71
72  Fields:
73    results: Sequential list of transcription results corresponding to
74      sequential portions of audio.
75  """
76
77  results = _messages.MessageField('SpeechRecognitionResult', 1, repeated=True)
78
79
80class Operation(_messages.Message):
81  r"""This resource represents a long-running operation that is the result of
82  a network API call.
83
84  Messages:
85    MetadataValue: Service-specific metadata associated with the operation. It
86      typically contains progress information and common metadata such as
87      create time. Some services might not provide such metadata. Any method
88      that returns a long-running operation should document the metadata type,
89      if any.
90    ResponseValue: The normal response of the operation in case of success. If
91      the original method returns no data on success, such as `Delete`, the
92      response is `google.protobuf.Empty`. If the original method is standard
93      `Get`/`Create`/`Update`, the response should be the resource. For other
94      methods, the response should have the type `XxxResponse`, where `Xxx` is
95      the original method name. For example, if the original method name is
96      `TakeSnapshot()`, the inferred response type is `TakeSnapshotResponse`.
97
98  Fields:
99    done: If the value is `false`, it means the operation is still in
100      progress. If `true`, the operation is completed, and either `error` or
101      `response` is available.
102    error: The error result of the operation in case of failure or
103      cancellation.
104    metadata: Service-specific metadata associated with the operation. It
105      typically contains progress information and common metadata such as
106      create time. Some services might not provide such metadata. Any method
107      that returns a long-running operation should document the metadata type,
108      if any.
109    name: The server-assigned name, which is only unique within the same
110      service that originally returns it. If you use the default HTTP mapping,
111      the `name` should be a resource name ending with
112      `operations/{unique_id}`.
113    response: The normal response of the operation in case of success. If the
114      original method returns no data on success, such as `Delete`, the
115      response is `google.protobuf.Empty`. If the original method is standard
116      `Get`/`Create`/`Update`, the response should be the resource. For other
117      methods, the response should have the type `XxxResponse`, where `Xxx` is
118      the original method name. For example, if the original method name is
119      `TakeSnapshot()`, the inferred response type is `TakeSnapshotResponse`.
120  """
121
122  @encoding.MapUnrecognizedFields('additionalProperties')
123  class MetadataValue(_messages.Message):
124    r"""Service-specific metadata associated with the operation. It typically
125    contains progress information and common metadata such as create time.
126    Some services might not provide such metadata. Any method that returns a
127    long-running operation should document the metadata type, if any.
128
129    Messages:
130      AdditionalProperty: An additional property for a MetadataValue object.
131
132    Fields:
133      additionalProperties: Properties of the object. Contains field @type
134        with type URL.
135    """
136
137    class AdditionalProperty(_messages.Message):
138      r"""An additional property for a MetadataValue object.
139
140      Fields:
141        key: Name of the additional property.
142        value: A extra_types.JsonValue attribute.
143      """
144
145      key = _messages.StringField(1)
146      value = _messages.MessageField('extra_types.JsonValue', 2)
147
148    additionalProperties = _messages.MessageField('AdditionalProperty', 1, repeated=True)
149
150  @encoding.MapUnrecognizedFields('additionalProperties')
151  class ResponseValue(_messages.Message):
152    r"""The normal response of the operation in case of success. If the
153    original method returns no data on success, such as `Delete`, the response
154    is `google.protobuf.Empty`. If the original method is standard
155    `Get`/`Create`/`Update`, the response should be the resource. For other
156    methods, the response should have the type `XxxResponse`, where `Xxx` is
157    the original method name. For example, if the original method name is
158    `TakeSnapshot()`, the inferred response type is `TakeSnapshotResponse`.
159
160    Messages:
161      AdditionalProperty: An additional property for a ResponseValue object.
162
163    Fields:
164      additionalProperties: Properties of the object. Contains field @type
165        with type URL.
166    """
167
168    class AdditionalProperty(_messages.Message):
169      r"""An additional property for a ResponseValue object.
170
171      Fields:
172        key: Name of the additional property.
173        value: A extra_types.JsonValue attribute.
174      """
175
176      key = _messages.StringField(1)
177      value = _messages.MessageField('extra_types.JsonValue', 2)
178
179    additionalProperties = _messages.MessageField('AdditionalProperty', 1, repeated=True)
180
181  done = _messages.BooleanField(1)
182  error = _messages.MessageField('Status', 2)
183  metadata = _messages.MessageField('MetadataValue', 3)
184  name = _messages.StringField(4)
185  response = _messages.MessageField('ResponseValue', 5)
186
187
188class RecognitionAudio(_messages.Message):
189  r"""Contains audio data in the encoding specified in the
190  `RecognitionConfig`. Either `content` or `uri` must be supplied. Supplying
191  both or neither returns google.rpc.Code.INVALID_ARGUMENT. See [content
192  limits](https://cloud.google.com/speech-to-text/quotas#content).
193
194  Fields:
195    content: The audio data bytes encoded as specified in `RecognitionConfig`.
196      Note: as with all bytes fields, proto buffers use a pure binary
197      representation, whereas JSON representations use base64.
198    uri: URI that points to a file that contains audio data bytes as specified
199      in `RecognitionConfig`. The file must not be compressed (for example,
200      gzip). Currently, only Google Cloud Storage URIs are supported, which
201      must be specified in the following format:
202      `gs://bucket_name/object_name` (other URI formats return
203      google.rpc.Code.INVALID_ARGUMENT). For more information, see [Request
204      URIs](https://cloud.google.com/storage/docs/reference-uris).
205  """
206
207  content = _messages.BytesField(1)
208  uri = _messages.StringField(2)
209
210
211class RecognitionConfig(_messages.Message):
212  r"""Provides information to the recognizer that specifies how to process the
213  request.
214
215  Enums:
216    EncodingValueValuesEnum: Encoding of audio data sent in all
217      `RecognitionAudio` messages. This field is optional for `FLAC` and `WAV`
218      audio files and required for all other audio formats. For details, see
219      AudioEncoding.
220
221  Fields:
222    audioChannelCount: The number of channels in the input audio data. ONLY
223      set this for MULTI-CHANNEL recognition. Valid values for LINEAR16 and
224      FLAC are `1`-`8`. Valid values for OGG_OPUS are '1'-'254'. Valid value
225      for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or
226      omitted, defaults to one channel (mono). Note: We only recognize the
227      first channel by default. To perform independent recognition on each
228      channel set `enable_separate_recognition_per_channel` to 'true'.
229    diarizationConfig: Config to enable speaker diarization and set additional
230      parameters to make diarization better suited for your application. Note:
231      When this is enabled, we send all the words from the beginning of the
232      audio for the top alternative in every consecutive STREAMING responses.
233      This is done in order to improve our speaker tags as our models learn to
234      identify the speakers in the conversation over time. For non-streaming
235      requests, the diarization results will be provided only in the top
236      alternative of the FINAL SpeechRecognitionResult.
237    enableAutomaticPunctuation: If 'true', adds punctuation to recognition
238      result hypotheses. This feature is only available in select languages.
239      Setting this for requests in other languages has no effect at all. The
240      default 'false' value does not add punctuation to result hypotheses.
241    enableSeparateRecognitionPerChannel: This needs to be set to `true`
242      explicitly and `audio_channel_count` > 1 to get each channel recognized
243      separately. The recognition result will contain a `channel_tag` field to
244      state which channel that result belongs to. If this is not true, we will
245      only recognize the first channel. The request is billed cumulatively for
246      all channels recognized: `audio_channel_count` multiplied by the length
247      of the audio.
248    enableWordTimeOffsets: If `true`, the top result includes a list of words
249      and the start and end time offsets (timestamps) for those words. If
250      `false`, no word-level time offset information is returned. The default
251      is `false`.
252    encoding: Encoding of audio data sent in all `RecognitionAudio` messages.
253      This field is optional for `FLAC` and `WAV` audio files and required for
254      all other audio formats. For details, see AudioEncoding.
255    languageCode: Required. The language of the supplied audio as a
256      [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
257      Example: "en-US". See [Language
258      Support](https://cloud.google.com/speech-to-text/docs/languages) for a
259      list of the currently supported language codes.
260    maxAlternatives: Maximum number of recognition hypotheses to be returned.
261      Specifically, the maximum number of `SpeechRecognitionAlternative`
262      messages within each `SpeechRecognitionResult`. The server may return
263      fewer than `max_alternatives`. Valid values are `0`-`30`. A value of `0`
264      or `1` will return a maximum of one. If omitted, will return a maximum
265      of one.
266    metadata: Metadata regarding this request.
267    model: Which model to select for the given request. Select the model best
268      suited to your domain to get best results. If a model is not explicitly
269      specified, then we auto-select a model based on the parameters in the
270      RecognitionConfig. *Model* *Description* command_and_search Best for
271      short queries such as voice commands or voice search. phone_call Best
272      for audio that originated from a phone call (typically recorded at an
273      8khz sampling rate). video Best for audio that originated from from
274      video or includes multiple speakers. Ideally the audio is recorded at a
275      16khz or greater sampling rate. This is a premium model that costs more
276      than the standard rate. default Best for audio that is not one of the
277      specific audio models. For example, long-form audio. Ideally the audio
278      is high-fidelity, recorded at a 16khz or greater sampling rate.
279    profanityFilter: If set to `true`, the server will attempt to filter out
280      profanities, replacing all but the initial character in each filtered
281      word with asterisks, e.g. "f***". If set to `false` or omitted,
282      profanities won't be filtered out.
283    sampleRateHertz: Sample rate in Hertz of the audio data sent in all
284      `RecognitionAudio` messages. Valid values are: 8000-48000. 16000 is
285      optimal. For best results, set the sampling rate of the audio source to
286      16000 Hz. If that's not possible, use the native sample rate of the
287      audio source (instead of re-sampling). This field is optional for FLAC
288      and WAV audio files, but is required for all other audio formats. For
289      details, see AudioEncoding.
290    speechContexts: Array of SpeechContext. A means to provide context to
291      assist the speech recognition. For more information, see [speech
292      adaptation](https://cloud.google.com/speech-to-text/docs/context-
293      strength).
294    useEnhanced: Set to true to use an enhanced model for speech recognition.
295      If `use_enhanced` is set to true and the `model` field is not set, then
296      an appropriate enhanced model is chosen if an enhanced model exists for
297      the audio. If `use_enhanced` is true and an enhanced version of the
298      specified model does not exist, then the speech is recognized using the
299      standard version of the specified model.
300  """
301
302  class EncodingValueValuesEnum(_messages.Enum):
303    r"""Encoding of audio data sent in all `RecognitionAudio` messages. This
304    field is optional for `FLAC` and `WAV` audio files and required for all
305    other audio formats. For details, see AudioEncoding.
306
307    Values:
308      ENCODING_UNSPECIFIED: Not specified.
309      LINEAR16: Uncompressed 16-bit signed little-endian samples (Linear PCM).
310      FLAC: `FLAC` (Free Lossless Audio Codec) is the recommended encoding
311        because it is lossless--therefore recognition is not compromised--and
312        requires only about half the bandwidth of `LINEAR16`. `FLAC` stream
313        encoding supports 16-bit and 24-bit samples, however, not all fields
314        in `STREAMINFO` are supported.
315      MULAW: 8-bit samples that compand 14-bit audio samples using G.711
316        PCMU/mu-law.
317      AMR: Adaptive Multi-Rate Narrowband codec. `sample_rate_hertz` must be
318        8000.
319      AMR_WB: Adaptive Multi-Rate Wideband codec. `sample_rate_hertz` must be
320        16000.
321      OGG_OPUS: Opus encoded audio frames in Ogg container
322        ([OggOpus](https://wiki.xiph.org/OggOpus)). `sample_rate_hertz` must
323        be one of 8000, 12000, 16000, 24000, or 48000.
324      SPEEX_WITH_HEADER_BYTE: Although the use of lossy encodings is not
325        recommended, if a very low bitrate encoding is required, `OGG_OPUS` is
326        highly preferred over Speex encoding. The [Speex](https://speex.org/)
327        encoding supported by Cloud Speech API has a header byte in each
328        block, as in MIME type `audio/x-speex-with-header-byte`. It is a
329        variant of the RTP Speex encoding defined in [RFC
330        5574](https://tools.ietf.org/html/rfc5574). The stream is a sequence
331        of blocks, one block per RTP packet. Each block starts with a byte
332        containing the length of the block, in bytes, followed by one or more
333        frames of Speex data, padded to an integral number of bytes (octets)
334        as specified in RFC 5574. In other words, each RTP header is replaced
335        with a single byte containing the block length. Only Speex wideband is
336        supported. `sample_rate_hertz` must be 16000.
337    """
338    ENCODING_UNSPECIFIED = 0
339    LINEAR16 = 1
340    FLAC = 2
341    MULAW = 3
342    AMR = 4
343    AMR_WB = 5
344    OGG_OPUS = 6
345    SPEEX_WITH_HEADER_BYTE = 7
346
347  audioChannelCount = _messages.IntegerField(1, variant=_messages.Variant.INT32)
348  diarizationConfig = _messages.MessageField('SpeakerDiarizationConfig', 2)
349  enableAutomaticPunctuation = _messages.BooleanField(3)
350  enableSeparateRecognitionPerChannel = _messages.BooleanField(4)
351  enableWordTimeOffsets = _messages.BooleanField(5)
352  encoding = _messages.EnumField('EncodingValueValuesEnum', 6)
353  languageCode = _messages.StringField(7)
354  maxAlternatives = _messages.IntegerField(8, variant=_messages.Variant.INT32)
355  metadata = _messages.MessageField('RecognitionMetadata', 9)
356  model = _messages.StringField(10)
357  profanityFilter = _messages.BooleanField(11)
358  sampleRateHertz = _messages.IntegerField(12, variant=_messages.Variant.INT32)
359  speechContexts = _messages.MessageField('SpeechContext', 13, repeated=True)
360  useEnhanced = _messages.BooleanField(14)
361
362
363class RecognitionMetadata(_messages.Message):
364  r"""Description of audio data to be recognized.
365
366  Enums:
367    InteractionTypeValueValuesEnum: The use case most closely describing the
368      audio content to be recognized.
369    MicrophoneDistanceValueValuesEnum: The audio type that most closely
370      describes the audio being recognized.
371    OriginalMediaTypeValueValuesEnum: The original media the speech was
372      recorded on.
373    RecordingDeviceTypeValueValuesEnum: The type of device the speech was
374      recorded with.
375
376  Fields:
377    audioTopic: Description of the content. Eg. "Recordings of federal supreme
378      court hearings from 2012".
379    industryNaicsCodeOfAudio: The industry vertical to which this speech
380      recognition request most closely applies. This is most indicative of the
381      topics contained in the audio. Use the 6-digit NAICS code to identify
382      the industry vertical - see https://www.naics.com/search/.
383    interactionType: The use case most closely describing the audio content to
384      be recognized.
385    microphoneDistance: The audio type that most closely describes the audio
386      being recognized.
387    originalMediaType: The original media the speech was recorded on.
388    originalMimeType: Mime type of the original audio file. For example
389      `audio/m4a`, `audio/x-alaw-basic`, `audio/mp3`, `audio/3gpp`. A list of
390      possible audio mime types is maintained at
391      http://www.iana.org/assignments/media-types/media-types.xhtml#audio
392    recordingDeviceName: The device used to make the recording. Examples
393      'Nexus 5X' or 'Polycom SoundStation IP 6000' or 'POTS' or 'VoIP' or
394      'Cardioid Microphone'.
395    recordingDeviceType: The type of device the speech was recorded with.
396  """
397
398  class InteractionTypeValueValuesEnum(_messages.Enum):
399    r"""The use case most closely describing the audio content to be
400    recognized.
401
402    Values:
403      INTERACTION_TYPE_UNSPECIFIED: Use case is either unknown or is something
404        other than one of the other values below.
405      DISCUSSION: Multiple people in a conversation or discussion. For example
406        in a meeting with two or more people actively participating. Typically
407        all the primary people speaking would be in the same room (if not, see
408        PHONE_CALL)
409      PRESENTATION: One or more persons lecturing or presenting to others,
410        mostly uninterrupted.
411      PHONE_CALL: A phone-call or video-conference in which two or more
412        people, who are not in the same room, are actively participating.
413      VOICEMAIL: A recorded message intended for another person to listen to.
414      PROFESSIONALLY_PRODUCED: Professionally produced audio (eg. TV Show,
415        Podcast).
416      VOICE_SEARCH: Transcribe spoken questions and queries into text.
417      VOICE_COMMAND: Transcribe voice commands, such as for controlling a
418        device.
419      DICTATION: Transcribe speech to text to create a written document, such
420        as a text-message, email or report.
421    """
422    INTERACTION_TYPE_UNSPECIFIED = 0
423    DISCUSSION = 1
424    PRESENTATION = 2
425    PHONE_CALL = 3
426    VOICEMAIL = 4
427    PROFESSIONALLY_PRODUCED = 5
428    VOICE_SEARCH = 6
429    VOICE_COMMAND = 7
430    DICTATION = 8
431
432  class MicrophoneDistanceValueValuesEnum(_messages.Enum):
433    r"""The audio type that most closely describes the audio being recognized.
434
435    Values:
436      MICROPHONE_DISTANCE_UNSPECIFIED: Audio type is not known.
437      NEARFIELD: The audio was captured from a closely placed microphone. Eg.
438        phone, dictaphone, or handheld microphone. Generally if there speaker
439        is within 1 meter of the microphone.
440      MIDFIELD: The speaker if within 3 meters of the microphone.
441      FARFIELD: The speaker is more than 3 meters away from the microphone.
442    """
443    MICROPHONE_DISTANCE_UNSPECIFIED = 0
444    NEARFIELD = 1
445    MIDFIELD = 2
446    FARFIELD = 3
447
448  class OriginalMediaTypeValueValuesEnum(_messages.Enum):
449    r"""The original media the speech was recorded on.
450
451    Values:
452      ORIGINAL_MEDIA_TYPE_UNSPECIFIED: Unknown original media type.
453      AUDIO: The speech data is an audio recording.
454      VIDEO: The speech data originally recorded on a video.
455    """
456    ORIGINAL_MEDIA_TYPE_UNSPECIFIED = 0
457    AUDIO = 1
458    VIDEO = 2
459
460  class RecordingDeviceTypeValueValuesEnum(_messages.Enum):
461    r"""The type of device the speech was recorded with.
462
463    Values:
464      RECORDING_DEVICE_TYPE_UNSPECIFIED: The recording device is unknown.
465      SMARTPHONE: Speech was recorded on a smartphone.
466      PC: Speech was recorded using a personal computer or tablet.
467      PHONE_LINE: Speech was recorded over a phone line.
468      VEHICLE: Speech was recorded in a vehicle.
469      OTHER_OUTDOOR_DEVICE: Speech was recorded outdoors.
470      OTHER_INDOOR_DEVICE: Speech was recorded indoors.
471    """
472    RECORDING_DEVICE_TYPE_UNSPECIFIED = 0
473    SMARTPHONE = 1
474    PC = 2
475    PHONE_LINE = 3
476    VEHICLE = 4
477    OTHER_OUTDOOR_DEVICE = 5
478    OTHER_INDOOR_DEVICE = 6
479
480  audioTopic = _messages.StringField(1)
481  industryNaicsCodeOfAudio = _messages.IntegerField(2, variant=_messages.Variant.UINT32)
482  interactionType = _messages.EnumField('InteractionTypeValueValuesEnum', 3)
483  microphoneDistance = _messages.EnumField('MicrophoneDistanceValueValuesEnum', 4)
484  originalMediaType = _messages.EnumField('OriginalMediaTypeValueValuesEnum', 5)
485  originalMimeType = _messages.StringField(6)
486  recordingDeviceName = _messages.StringField(7)
487  recordingDeviceType = _messages.EnumField('RecordingDeviceTypeValueValuesEnum', 8)
488
489
490class RecognizeRequest(_messages.Message):
491  r"""The top-level message sent by the client for the `Recognize` method.
492
493  Fields:
494    audio: Required. The audio data to be recognized.
495    config: Required. Provides information to the recognizer that specifies
496      how to process the request.
497  """
498
499  audio = _messages.MessageField('RecognitionAudio', 1)
500  config = _messages.MessageField('RecognitionConfig', 2)
501
502
503class RecognizeResponse(_messages.Message):
504  r"""The only message returned to the client by the `Recognize` method. It
505  contains the result as zero or more sequential `SpeechRecognitionResult`
506  messages.
507
508  Fields:
509    results: Sequential list of transcription results corresponding to
510      sequential portions of audio.
511  """
512
513  results = _messages.MessageField('SpeechRecognitionResult', 1, repeated=True)
514
515
516class SpeakerDiarizationConfig(_messages.Message):
517  r"""Config to enable speaker diarization.
518
519  Fields:
520    enableSpeakerDiarization: If 'true', enables speaker detection for each
521      recognized word in the top alternative of the recognition result using a
522      speaker_tag provided in the WordInfo.
523    maxSpeakerCount: Maximum number of speakers in the conversation. This
524      range gives you more flexibility by allowing the system to automatically
525      determine the correct number of speakers. If not set, the default value
526      is 6.
527    minSpeakerCount: Minimum number of speakers in the conversation. This
528      range gives you more flexibility by allowing the system to automatically
529      determine the correct number of speakers. If not set, the default value
530      is 2.
531    speakerTag: Output only. Unused.
532  """
533
534  enableSpeakerDiarization = _messages.BooleanField(1)
535  maxSpeakerCount = _messages.IntegerField(2, variant=_messages.Variant.INT32)
536  minSpeakerCount = _messages.IntegerField(3, variant=_messages.Variant.INT32)
537  speakerTag = _messages.IntegerField(4, variant=_messages.Variant.INT32)
538
539
540class SpeechContext(_messages.Message):
541  r"""Provides "hints" to the speech recognizer to favor specific words and
542  phrases in the results.
543
544  Fields:
545    phrases: A list of strings containing words and phrases "hints" so that
546      the speech recognition is more likely to recognize them. This can be
547      used to improve the accuracy for specific words and phrases, for
548      example, if specific commands are typically spoken by the user. This can
549      also be used to add additional words to the vocabulary of the
550      recognizer. See [usage limits](https://cloud.google.com/speech-to-
551      text/quotas#content). List items can also be set to classes for groups
552      of words that represent common concepts that occur in natural language.
553      For example, rather than providing phrase hints for every month of the
554      year, using the $MONTH class improves the likelihood of correctly
555      transcribing audio that includes months.
556  """
557
558  phrases = _messages.StringField(1, repeated=True)
559
560
561class SpeechOperationsGetRequest(_messages.Message):
562  r"""A SpeechOperationsGetRequest object.
563
564  Fields:
565    name: The name of the operation resource.
566  """
567
568  name = _messages.StringField(1, required=True)
569
570
571class SpeechOperationsListRequest(_messages.Message):
572  r"""A SpeechOperationsListRequest object.
573
574  Fields:
575    filter: The standard list filter.
576    name: The name of the operation's parent resource.
577    pageSize: The standard list page size.
578    pageToken: The standard list page token.
579  """
580
581  filter = _messages.StringField(1)
582  name = _messages.StringField(2)
583  pageSize = _messages.IntegerField(3, variant=_messages.Variant.INT32)
584  pageToken = _messages.StringField(4)
585
586
587class SpeechRecognitionAlternative(_messages.Message):
588  r"""Alternative hypotheses (a.k.a. n-best list).
589
590  Fields:
591    confidence: The confidence estimate between 0.0 and 1.0. A higher number
592      indicates an estimated greater likelihood that the recognized words are
593      correct. This field is set only for the top alternative of a non-
594      streaming result or, of a streaming result where `is_final=true`. This
595      field is not guaranteed to be accurate and users should not rely on it
596      to be always provided. The default of 0.0 is a sentinel value indicating
597      `confidence` was not set.
598    transcript: Transcript text representing the words that the user spoke.
599    words: A list of word-specific information for each recognized word. Note:
600      When `enable_speaker_diarization` is true, you will see all the words
601      from the beginning of the audio.
602  """
603
604  confidence = _messages.FloatField(1, variant=_messages.Variant.FLOAT)
605  transcript = _messages.StringField(2)
606  words = _messages.MessageField('WordInfo', 3, repeated=True)
607
608
609class SpeechRecognitionResult(_messages.Message):
610  r"""A speech recognition result corresponding to a portion of the audio.
611
612  Fields:
613    alternatives: May contain one or more recognition hypotheses (up to the
614      maximum specified in `max_alternatives`). These alternatives are ordered
615      in terms of accuracy, with the top (first) alternative being the most
616      probable, as ranked by the recognizer.
617    channelTag: For multi-channel audio, this is the channel number
618      corresponding to the recognized result for the audio from that channel.
619      For audio_channel_count = N, its output values can range from '1' to
620      'N'.
621  """
622
623  alternatives = _messages.MessageField('SpeechRecognitionAlternative', 1, repeated=True)
624  channelTag = _messages.IntegerField(2, variant=_messages.Variant.INT32)
625
626
627class StandardQueryParameters(_messages.Message):
628  r"""Query parameters accepted by all methods.
629
630  Enums:
631    FXgafvValueValuesEnum: V1 error format.
632    AltValueValuesEnum: Data format for response.
633
634  Fields:
635    f__xgafv: V1 error format.
636    access_token: OAuth access token.
637    alt: Data format for response.
638    callback: JSONP
639    fields: Selector specifying which fields to include in a partial response.
640    key: API key. Your API key identifies your project and provides you with
641      API access, quota, and reports. Required unless you provide an OAuth 2.0
642      token.
643    oauth_token: OAuth 2.0 token for the current user.
644    prettyPrint: Returns response with indentations and line breaks.
645    quotaUser: Available to use for quota purposes for server-side
646      applications. Can be any arbitrary string assigned to a user, but should
647      not exceed 40 characters.
648    trace: A tracing token of the form "token:<tokenid>" to include in api
649      requests.
650    uploadType: Legacy upload protocol for media (e.g. "media", "multipart").
651    upload_protocol: Upload protocol for media (e.g. "raw", "multipart").
652  """
653
654  class AltValueValuesEnum(_messages.Enum):
655    r"""Data format for response.
656
657    Values:
658      json: Responses with Content-Type of application/json
659      media: Media download with context-dependent Content-Type
660      proto: Responses with Content-Type of application/x-protobuf
661    """
662    json = 0
663    media = 1
664    proto = 2
665
666  class FXgafvValueValuesEnum(_messages.Enum):
667    r"""V1 error format.
668
669    Values:
670      _1: v1 error format
671      _2: v2 error format
672    """
673    _1 = 0
674    _2 = 1
675
676  f__xgafv = _messages.EnumField('FXgafvValueValuesEnum', 1)
677  access_token = _messages.StringField(2)
678  alt = _messages.EnumField('AltValueValuesEnum', 3, default='json')
679  callback = _messages.StringField(4)
680  fields = _messages.StringField(5)
681  key = _messages.StringField(6)
682  oauth_token = _messages.StringField(7)
683  prettyPrint = _messages.BooleanField(8, default=True)
684  quotaUser = _messages.StringField(9)
685  trace = _messages.StringField(10)
686  uploadType = _messages.StringField(11)
687  upload_protocol = _messages.StringField(12)
688
689
690class Status(_messages.Message):
691  r"""The `Status` type defines a logical error model that is suitable for
692  different programming environments, including REST APIs and RPC APIs. It is
693  used by [gRPC](https://github.com/grpc). Each `Status` message contains
694  three pieces of data: error code, error message, and error details. You can
695  find out more about this error model and how to work with it in the [API
696  Design Guide](https://cloud.google.com/apis/design/errors).
697
698  Messages:
699    DetailsValueListEntry: A DetailsValueListEntry object.
700
701  Fields:
702    code: The status code, which should be an enum value of google.rpc.Code.
703    details: A list of messages that carry the error details. There is a
704      common set of message types for APIs to use.
705    message: A developer-facing error message, which should be in English. Any
706      user-facing error message should be localized and sent in the
707      google.rpc.Status.details field, or localized by the client.
708  """
709
710  @encoding.MapUnrecognizedFields('additionalProperties')
711  class DetailsValueListEntry(_messages.Message):
712    r"""A DetailsValueListEntry object.
713
714    Messages:
715      AdditionalProperty: An additional property for a DetailsValueListEntry
716        object.
717
718    Fields:
719      additionalProperties: Properties of the object. Contains field @type
720        with type URL.
721    """
722
723    class AdditionalProperty(_messages.Message):
724      r"""An additional property for a DetailsValueListEntry object.
725
726      Fields:
727        key: Name of the additional property.
728        value: A extra_types.JsonValue attribute.
729      """
730
731      key = _messages.StringField(1)
732      value = _messages.MessageField('extra_types.JsonValue', 2)
733
734    additionalProperties = _messages.MessageField('AdditionalProperty', 1, repeated=True)
735
736  code = _messages.IntegerField(1, variant=_messages.Variant.INT32)
737  details = _messages.MessageField('DetailsValueListEntry', 2, repeated=True)
738  message = _messages.StringField(3)
739
740
741class WordInfo(_messages.Message):
742  r"""Word-specific information for recognized words.
743
744  Fields:
745    endTime: Time offset relative to the beginning of the audio, and
746      corresponding to the end of the spoken word. This field is only set if
747      `enable_word_time_offsets=true` and only in the top hypothesis. This is
748      an experimental feature and the accuracy of the time offset can vary.
749    speakerTag: Output only. A distinct integer value is assigned for every
750      speaker within the audio. This field specifies which one of those
751      speakers was detected to have spoken this word. Value ranges from '1' to
752      diarization_speaker_count. speaker_tag is set if
753      enable_speaker_diarization = 'true' and only in the top alternative.
754    startTime: Time offset relative to the beginning of the audio, and
755      corresponding to the start of the spoken word. This field is only set if
756      `enable_word_time_offsets=true` and only in the top hypothesis. This is
757      an experimental feature and the accuracy of the time offset can vary.
758    word: The word corresponding to this set of information.
759  """
760
761  endTime = _messages.StringField(1)
762  speakerTag = _messages.IntegerField(2, variant=_messages.Variant.INT32)
763  startTime = _messages.StringField(3)
764  word = _messages.StringField(4)
765
766
767encoding.AddCustomJsonFieldMapping(
768    StandardQueryParameters, 'f__xgafv', '$.xgafv')
769encoding.AddCustomJsonEnumMapping(
770    StandardQueryParameters.FXgafvValueValuesEnum, '_1', '1')
771encoding.AddCustomJsonEnumMapping(
772    StandardQueryParameters.FXgafvValueValuesEnum, '_2', '2')
773