--- /dev/null
+// Copyright 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.cloud.dialogflow.v2beta1;
+
+import "google/api/annotations.proto";
+import "google/cloud/dialogflow/v2beta1/audio_config.proto";
+import "google/cloud/dialogflow/v2beta1/context.proto";
+import "google/cloud/dialogflow/v2beta1/intent.proto";
+import "google/cloud/dialogflow/v2beta1/session_entity_type.proto";
+import "google/protobuf/struct.proto";
+import "google/rpc/status.proto";
+import "google/type/latlng.proto";
+
+option cc_enable_arenas = true;
+option csharp_namespace = "Google.Cloud.Dialogflow.V2beta1";
+option go_package = "google.golang.org/genproto/googleapis/cloud/dialogflow/v2beta1;dialogflow";
+option java_multiple_files = true;
+option java_outer_classname = "SessionProto";
+option java_package = "com.google.cloud.dialogflow.v2beta1";
+option objc_class_prefix = "DF";
+
+// A session represents an interaction with a user. You retrieve user input
+// and pass it to the
+// [DetectIntent][google.cloud.dialogflow.v2beta1.Sessions.DetectIntent] (or
+// [StreamingDetectIntent][google.cloud.dialogflow.v2beta1.Sessions.StreamingDetectIntent])
+// method to determine user intent and respond.
+service Sessions {
+ // Processes a natural language query and returns structured, actionable data
+ // as a result. This method is not idempotent, because it may cause contexts
+ // and session entity types to be updated, which in turn might affect
+ // results of future queries.
+ rpc DetectIntent(DetectIntentRequest) returns (DetectIntentResponse) {
+ option (google.api.http) = {
+ post: "/v2beta1/{session=projects/*/agent/sessions/*}:detectIntent"
+ body: "*"
+ additional_bindings {
+ post: "/v2beta1/{session=projects/*/agent/environments/*/users/*/sessions/*}:detectIntent"
+ body: "*"
+ }
+ };
+ }
+
+ // Processes a natural language query in audio format in a streaming fashion
+ // and returns structured, actionable data as a result. This method is only
+ // available via the gRPC API (not REST).
+ rpc StreamingDetectIntent(stream StreamingDetectIntentRequest)
+ returns (stream StreamingDetectIntentResponse) {}
+}
+
+// The request to detect user's intent.
+message DetectIntentRequest {
+ // Required. The name of the session this query is sent to. Format:
+ // `projects/<Project ID>/agent/sessions/<Session ID>`, or
+ // `projects/<Project ID>/agent/environments/<Environment ID>/users/<User
+ // ID>/sessions/<Session ID>`. If `Environment ID` is not specified, we assume
+ // default 'draft' environment. If `User ID` is not specified, we are using
+ // "-". It’s up to the API caller to choose an appropriate `Session ID` and
+ // `User Id`. They can be a random numbers or some type of user and session
+ // identifiers (preferably hashed). The length of the `Session ID` and
+ // `User ID` must not exceed 36 characters.
+ string session = 1;
+
+ // Optional. The parameters of this query.
+ QueryParameters query_params = 2;
+
+ // Required. The input specification. It can be set to:
+ //
+ // 1. an audio config
+ // which instructs the speech recognizer how to process the speech audio,
+ //
+ // 2. a conversational query in the form of text, or
+ //
+ // 3. an event that specifies which intent to trigger.
+ QueryInput query_input = 3;
+
+ // Optional. Instructs the speech synthesizer how to generate the output
+ // audio. If this field is not set and agent-level speech synthesizer is not
+ // configured, no output audio is generated.
+ OutputAudioConfig output_audio_config = 4;
+
+ // Optional. The natural language speech audio to be processed. This field
+ // should be populated iff `query_input` is set to an input audio config.
+ // A single request can contain up to 1 minute of speech audio data.
+ bytes input_audio = 5;
+}
+
+// The message returned from the DetectIntent method.
+message DetectIntentResponse {
+ // The unique identifier of the response. It can be used to
+ // locate a response in the training example set or for reporting issues.
+ string response_id = 1;
+
+ // The selected results of the conversational query or event processing.
+ // See `alternative_query_results` for additional potential results.
+ QueryResult query_result = 2;
+
+ // If Knowledge Connectors are enabled, there could be more than one result
+ // returned for a given query or event, and this field will contain all
+ // results except for the top one, which is captured in query_result. The
+ // alternative results are ordered by decreasing
+ // `QueryResult.intent_detection_confidence`. If Knowledge Connectors are
+ // disabled, this field will be empty until multiple responses for regular
+ // intents are supported, at which point those additional results will be
+ // surfaced here.
+ repeated QueryResult alternative_query_results = 5;
+
+ // Specifies the status of the webhook request. `webhook_status`
+ // is never populated in webhook requests.
+ google.rpc.Status webhook_status = 3;
+
+ // The audio data bytes encoded as specified in the request.
+ bytes output_audio = 4;
+
+ // Instructs the speech synthesizer how to generate the output audio. This
+ // field is populated from the agent-level speech synthesizer configuration,
+ // if enabled.
+ OutputAudioConfig output_audio_config = 6;
+}
+
+// Represents the parameters of the conversational query.
+message QueryParameters {
+ // Optional. The time zone of this conversational query from the
+ // [time zone database](https://www.iana.org/time-zones), e.g.,
+ // America/New_York, Europe/Paris. If not provided, the time zone specified in
+ // agent settings is used.
+ string time_zone = 1;
+
+ // Optional. The geo location of this conversational query.
+ google.type.LatLng geo_location = 2;
+
+ // Optional. The collection of contexts to be activated before this query is
+ // executed.
+ repeated Context contexts = 3;
+
+ // Optional. Specifies whether to delete all contexts in the current session
+ // before the new ones are activated.
+ bool reset_contexts = 4;
+
+ // Optional. The collection of session entity types to replace or extend
+ // developer entities with for this query only. The entity synonyms apply
+ // to all languages.
+ repeated SessionEntityType session_entity_types = 5;
+
+ // Optional. This field can be used to pass custom data into the webhook
+ // associated with the agent. Arbitrary JSON objects are supported.
+ google.protobuf.Struct payload = 6;
+
+ // Optional. KnowledgeBases to get alternative results from. If not set, the
+ // KnowledgeBases enabled in the agent (through UI) will be used.
+ // Format: `projects/<Project ID>/knowledgeBases/<Knowledge Base ID>`.
+ //
+ // Note: This field is `repeated` for forward compatibility, currently only
+ // the first one is supported, we may return an error if multiple
+ // KnowledgeBases are specified.
+ repeated string knowledge_base_names = 12;
+
+ // Optional. Configures the type of sentiment analysis to perform. If not
+ // provided, sentiment analysis is not performed.
+ // Note: Sentiment Analysis is only currently available for Enterprise Edition
+ // agents.
+ SentimentAnalysisRequestConfig sentiment_analysis_request_config = 10;
+}
+
+// Represents the query input. It can contain either:
+//
+// 1. An audio config which
+// instructs the speech recognizer how to process the speech audio.
+//
+// 2. A conversational query in the form of text,.
+//
+// 3. An event that specifies which intent to trigger.
+message QueryInput {
+ // Required. The input specification.
+ oneof input {
+ // Instructs the speech recognizer how to process the speech audio.
+ InputAudioConfig audio_config = 1;
+
+ // The natural language text to be processed.
+ TextInput text = 2;
+
+ // The event to be processed.
+ EventInput event = 3;
+ }
+}
+
+// Represents the result of conversational query or event processing.
+message QueryResult {
+ // The original conversational query text:
+ // - If natural language text was provided as input, `query_text` contains
+ // a copy of the input.
+ // - If natural language speech audio was provided as input, `query_text`
+ // contains the speech recognition result. If speech recognizer produced
+ // multiple alternatives, a particular one is picked.
+ // - If an event was provided as input, `query_text` is not set.
+ string query_text = 1;
+
+ // The language that was triggered during intent detection.
+ // See [Language Support](https://dialogflow.com/docs/reference/language)
+ // for a list of the currently supported language codes.
+ string language_code = 15;
+
+ // The Speech recognition confidence between 0.0 and 1.0. A higher number
+ // indicates an estimated greater likelihood that the recognized words are
+ // correct. The default of 0.0 is a sentinel value indicating that confidence
+ // was not set.
+ //
+ // This field is not guaranteed to be accurate or set. In particular this
+ // field isn't set for StreamingDetectIntent since the streaming endpoint has
+ // separate confidence estimates per portion of the audio in
+ // StreamingRecognitionResult.
+ float speech_recognition_confidence = 2;
+
+ // The action name from the matched intent.
+ string action = 3;
+
+ // The collection of extracted parameters.
+ google.protobuf.Struct parameters = 4;
+
+ // This field is set to:
+ // - `false` if the matched intent has required parameters and not all of
+ // the required parameter values have been collected.
+ // - `true` if all required parameter values have been collected, or if the
+ // matched intent doesn't contain any required parameters.
+ bool all_required_params_present = 5;
+
+ // The text to be pronounced to the user or shown on the screen.
+ string fulfillment_text = 6;
+
+ // The collection of rich messages to present to the user.
+ repeated Intent.Message fulfillment_messages = 7;
+
+ // If the query was fulfilled by a webhook call, this field is set to the
+ // value of the `source` field returned in the webhook response.
+ string webhook_source = 8;
+
+ // If the query was fulfilled by a webhook call, this field is set to the
+ // value of the `payload` field returned in the webhook response.
+ google.protobuf.Struct webhook_payload = 9;
+
+ // The collection of output contexts. If applicable,
+ // `output_contexts.parameters` contains entries with name
+ // `<parameter name>.original` containing the original parameter values
+ // before the query.
+ repeated Context output_contexts = 10;
+
+ // The intent that matched the conversational query. Some, not
+ // all fields are filled in this message, including but not limited to:
+ // `name`, `display_name` and `webhook_state`.
+ Intent intent = 11;
+
+ // The intent detection confidence. Values range from 0.0
+ // (completely uncertain) to 1.0 (completely certain).
+ // If there are `multiple knowledge_answers` messages, this value is set to
+ // the greatest `knowledgeAnswers.match_confidence` value in the list.
+ float intent_detection_confidence = 12;
+
+ // The free-form diagnostic info. For example, this field
+ // could contain webhook call latency.
+ google.protobuf.Struct diagnostic_info = 14;
+
+ // The sentiment analysis result, which depends on the
+ // `sentiment_analysis_request_config` specified in the request.
+ SentimentAnalysisResult sentiment_analysis_result = 17;
+
+ // The result from Knowledge Connector (if any), ordered by decreasing
+ // `KnowledgeAnswers.match_confidence`.
+ KnowledgeAnswers knowledge_answers = 18;
+}
+
+// Represents the result of querying a Knowledge base.
+message KnowledgeAnswers {
+ // An answer from Knowledge Connector.
+ message Answer {
+ // Represents the system's confidence that this knowledge answer is a good
+ // match for this conversational query.
+ enum MatchConfidenceLevel {
+ // Not specified.
+ MATCH_CONFIDENCE_LEVEL_UNSPECIFIED = 0;
+
+ // Indicates that the confidence is low.
+ LOW = 1;
+
+ // Indicates our confidence is medium.
+ MEDIUM = 2;
+
+ // Indicates our confidence is high.
+ HIGH = 3;
+ }
+
+ // Indicates which Knowledge Document this answer was extracted from.
+ // Format: `projects/<Project ID>/knowledgeBases/<Knowledge Base
+ // ID>/documents/<Document ID>`.
+ string source = 1;
+
+ // The corresponding FAQ question if the answer was extracted from a FAQ
+ // Document, empty otherwise.
+ string faq_question = 2;
+
+ // The piece of text from the `source` knowledge base document that answers
+ // this conversational query.
+ string answer = 3;
+
+ // The system's confidence level that this knowledge answer is a good match
+ // for this conversational query.
+ // NOTE: The confidence level for a given `<query, answer>` pair may change
+ // without notice, as it depends on models that are constantly being
+ // improved. However, it will change less frequently than the confidence
+ // score below, and should be preferred for referencing the quality of an
+ // answer.
+ MatchConfidenceLevel match_confidence_level = 4;
+
+ // The system's confidence score that this Knowledge answer is a good match
+ // for this converstational query, range from 0.0 (completely uncertain)
+ // to 1.0 (completely certain).
+ // Note: The confidence score is likely to vary somewhat (possibly even for
+ // identical requests), as the underlying model is under constant
+ // improvement, we may deprecate it in the future. We recommend using
+ // `match_confidence_level` which should be generally more stable.
+ float match_confidence = 5;
+ }
+
+ // A list of answers from Knowledge Connector.
+ repeated Answer answers = 1;
+}
+
+// The top-level message sent by the client to the
+// `StreamingDetectIntent` method.
+//
+// Multiple request messages should be sent in order:
+//
+// 1. The first message must contain `session`, `query_input` plus optionally
+// `query_params` and/or `single_utterance`. If the client wants to receive
+// an audio response, it should also contain `output_audio_config`.
+// The message must not contain `input_audio`.
+//
+// 2. If `query_input` was set to a streaming input audio config,
+// all subsequent messages must contain only `input_audio`.
+// Otherwise, finish the request stream.
+message StreamingDetectIntentRequest {
+ // Required. The name of the session the query is sent to.
+ // Format of the session name:
+ // `projects/<Project ID>/agent/sessions/<Session ID>`, or
+ // `projects/<Project ID>/agent/environments/<Environment ID>/users/<User
+ // ID>/sessions/<Session ID>`. If `Environment ID` is not specified, we assume
+ // default 'draft' environment. If `User ID` is not specified, we are using
+ // "-". It’s up to the API caller to choose an appropriate `Session ID` and
+ // `User Id`. They can be a random numbers or some type of user and session
+ // identifiers (preferably hashed). The length of the `Session ID` and
+ // `User ID` must not exceed 36 characters.
+ string session = 1;
+
+ // Optional. The parameters of this query.
+ QueryParameters query_params = 2;
+
+ // Required. The input specification. It can be set to:
+ //
+ // 1. an audio config which instructs the speech recognizer how to process
+ // the speech audio,
+ //
+ // 2. a conversational query in the form of text, or
+ //
+ // 3. an event that specifies which intent to trigger.
+ QueryInput query_input = 3;
+
+ // Optional. If `false` (default), recognition does not cease until the
+ // client closes the stream.
+ // If `true`, the recognizer will detect a single spoken utterance in input
+ // audio. Recognition ceases when it detects the audio's voice has
+ // stopped or paused. In this case, once a detected intent is received, the
+ // client should close the stream and start a new request with a new stream as
+ // needed.
+ // This setting is ignored when `query_input` is a piece of text or an event.
+ bool single_utterance = 4;
+
+ // Optional. Instructs the speech synthesizer how to generate the output
+ // audio. If this field is not set and agent-level speech synthesizer is not
+ // configured, no output audio is generated.
+ OutputAudioConfig output_audio_config = 5;
+
+ // Optional. The input audio content to be recognized. Must be sent if
+ // `query_input` was set to a streaming input audio config. The complete audio
+ // over all streaming messages must not exceed 1 minute.
+ bytes input_audio = 6;
+}
+
+// The top-level message returned from the
+// `StreamingDetectIntent` method.
+//
+// Multiple response messages can be returned in order:
+//
+// 1. If the input was set to streaming audio, the first one or more messages
+// contain `recognition_result`. Each `recognition_result` represents a more
+// complete transcript of what the user said. The last `recognition_result`
+// has `is_final` set to `true`.
+//
+// 2. The next message contains `response_id`, `query_result`,
+// `alternative_query_results` and optionally `webhook_status` if a WebHook
+// was called.
+//
+// 3. If `output_audio_config` was specified in the request or agent-level
+// speech synthesizer is configured, all subsequent messages contain
+// `output_audio` and `output_audio_config`.
+message StreamingDetectIntentResponse {
+ // The unique identifier of the response. It can be used to
+ // locate a response in the training example set or for reporting issues.
+ string response_id = 1;
+
+ // The result of speech recognition.
+ StreamingRecognitionResult recognition_result = 2;
+
+ // The selected results of the conversational query or event processing.
+ // See `alternative_query_results` for additional potential results.
+ QueryResult query_result = 3;
+
+ // If Knowledge Connectors are enabled, there could be more than one result
+ // returned for a given query or event, and this field will contain all
+ // results except for the top one, which is captured in query_result. The
+ // alternative results are ordered by decreasing
+ // `QueryResult.intent_detection_confidence`. If Knowledge Connectors are
+ // disabled, this field will be empty until multiple responses for regular
+ // intents are supported, at which point those additional results will be
+ // surfaced here.
+ repeated QueryResult alternative_query_results = 7;
+
+ // Specifies the status of the webhook request.
+ google.rpc.Status webhook_status = 4;
+
+ // The audio data bytes encoded as specified in the request.
+ bytes output_audio = 5;
+
+ // Instructs the speech synthesizer how to generate the output audio. This
+ // field is populated from the agent-level speech synthesizer configuration,
+ // if enabled.
+ OutputAudioConfig output_audio_config = 6;
+}
+
+// Contains a speech recognition result corresponding to a portion of the audio
+// that is currently being processed or an indication that this is the end
+// of the single requested utterance.
+//
+// Example:
+//
+// 1. transcript: "tube"
+//
+// 2. transcript: "to be a"
+//
+// 3. transcript: "to be"
+//
+// 4. transcript: "to be or not to be"
+// is_final: true
+//
+// 5. transcript: " that's"
+//
+// 6. transcript: " that is"
+//
+// 7. recognition_event_type: `RECOGNITION_EVENT_END_OF_SINGLE_UTTERANCE`
+//
+// 8. transcript: " that is the question"
+// is_final: true
+//
+// Only two of the responses contain final results (#4 and #8 indicated by
+// `is_final: true`). Concatenating these generates the full transcript: "to be
+// or not to be that is the question".
+//
+// In each response we populate:
+//
+// * for `MESSAGE_TYPE_TRANSCRIPT`: `transcript` and possibly `is_final`.
+//
+// * for `MESSAGE_TYPE_END_OF_SINGLE_UTTERANCE`: only `event_type`.
+message StreamingRecognitionResult {
+ // Type of the response message.
+ enum MessageType {
+ // Not specified. Should never be used.
+ MESSAGE_TYPE_UNSPECIFIED = 0;
+
+ // Message contains a (possibly partial) transcript.
+ TRANSCRIPT = 1;
+
+ // Event indicates that the server has detected the end of the user's speech
+ // utterance and expects no additional speech. Therefore, the server will
+ // not process additional audio (although it may subsequently return
+ // additional results). The client should stop sending additional audio
+ // data, half-close the gRPC connection, and wait for any additional results
+ // until the server closes the gRPC connection. This message is only sent if
+ // `single_utterance` was set to `true`, and is not used otherwise.
+ END_OF_SINGLE_UTTERANCE = 2;
+ }
+
+ // Type of the result message.
+ MessageType message_type = 1;
+
+ // Transcript text representing the words that the user spoke.
+ // Populated if and only if `event_type` = `RECOGNITION_EVENT_TRANSCRIPT`.
+ string transcript = 2;
+
+ // The default of 0.0 is a sentinel value indicating `confidence` was not set.
+ // If `false`, the `StreamingRecognitionResult` represents an
+ // interim result that may change. If `true`, the recognizer will not return
+ // any further hypotheses about this piece of the audio. May only be populated
+ // for `event_type` = `RECOGNITION_EVENT_TRANSCRIPT`.
+ bool is_final = 3;
+
+ // The Speech confidence between 0.0 and 1.0 for the current portion of audio.
+ // A higher number indicates an estimated greater likelihood that the
+ // recognized words are correct. The default of 0.0 is a sentinel value
+ // indicating that confidence was not set.
+ //
+ // This field is typically only provided if `is_final` is true and you should
+ // not rely on it being accurate or even set.
+ float confidence = 4;
+}
+
+// Instructs the speech recognizer how to process the audio content.
+message InputAudioConfig {
+ // Required. Audio encoding of the audio content to process.
+ AudioEncoding audio_encoding = 1;
+
+ // Required. Sample rate (in Hertz) of the audio content sent in the query.
+ // Refer to
+ // [Cloud Speech API
+ // documentation](https://cloud.google.com/speech-to-text/docs/basics) for
+ // more details.
+ int32 sample_rate_hertz = 2;
+
+ // Required. The language of the supplied audio. Dialogflow does not do
+ // translations. See [Language
+ // Support](https://dialogflow.com/docs/languages) for a list of the
+ // currently supported language codes. Note that queries in the same session
+ // do not necessarily need to specify the same language.
+ string language_code = 3;
+
+ // Optional. The collection of phrase hints which are used to boost accuracy
+ // of speech recognition.
+ // Refer to
+ // [Cloud Speech API
+ // documentation](https://cloud.google.com/speech-to-text/docs/basics#phrase-hints)
+ // for more details.
+ repeated string phrase_hints = 4;
+
+ // Optional. Which Speech model to select for the given request. Select the
+ // model best suited to your domain to get best results. If a model is not
+ // explicitly specified, then we auto-select a model based on the parameters
+ // in the InputAudioConfig.
+ // If enhanced speech model is enabled for the agent and an enhanced
+ // version of the specified model for the language does not exist, then the
+ // speech is recognized using the standard version of the specified model.
+ // Refer to
+ // [Cloud Speech API
+ // documentation](https://cloud.google.com/speech-to-text/docs/basics#select-model)
+ // for more details.
+ string model = 7;
+}
+
+// Represents the natural language text to be processed.
+message TextInput {
+ // Required. The UTF-8 encoded natural language text to be processed.
+ // Text length must not exceed 256 bytes.
+ string text = 1;
+
+ // Required. The language of this conversational query. See [Language
+ // Support](https://dialogflow.com/docs/languages) for a list of the
+ // currently supported language codes. Note that queries in the same session
+ // do not necessarily need to specify the same language.
+ string language_code = 2;
+}
+
+// Events allow for matching intents by event name instead of the natural
+// language input. For instance, input `<event: { name: “welcome_event”,
+// parameters: { name: “Sam” } }>` can trigger a personalized welcome response.
+// The parameter `name` may be used by the agent in the response:
+// `“Hello #welcome_event.name! What can I do for you today?”`.
+message EventInput {
+ // Required. The unique identifier of the event.
+ string name = 1;
+
+ // Optional. The collection of parameters associated with the event.
+ google.protobuf.Struct parameters = 2;
+
+ // Required. The language of this query. See [Language
+ // Support](https://dialogflow.com/docs/languages) for a list of the
+ // currently supported language codes. Note that queries in the same session
+ // do not necessarily need to specify the same language.
+ string language_code = 3;
+}
+
+// Configures the types of sentiment analysis to perform.
+message SentimentAnalysisRequestConfig {
+ // Optional. Instructs the service to perform sentiment analysis on
+ // `query_text`. If not provided, sentiment analysis is not performed on
+ // `query_text`.
+ bool analyze_query_text_sentiment = 1;
+}
+
+// The result of sentiment analysis as configured by
+// `sentiment_analysis_request_config`.
+message SentimentAnalysisResult {
+ // The sentiment analysis result for `query_text`.
+ Sentiment query_text_sentiment = 1;
+}
+
+// The sentiment, such as positive/negative feeling or association, for a unit
+// of analysis, such as the query text.
+message Sentiment {
+ // Sentiment score between -1.0 (negative sentiment) and 1.0 (positive
+ // sentiment).
+ float score = 1;
+
+ // A non-negative number in the [0, +inf) range, which represents the absolute
+ // magnitude of sentiment, regardless of score (positive or negative).
+ float magnitude = 2;
+}
+
+// Audio encoding of the audio content sent in the conversational query request.
+// Refer to the
+// [Cloud Speech API
+// documentation](https://cloud.google.com/speech-to-text/docs/basics) for more
+// details.
+enum AudioEncoding {
+ // Not specified.
+ AUDIO_ENCODING_UNSPECIFIED = 0;
+
+ // Uncompressed 16-bit signed little-endian samples (Linear PCM).
+ AUDIO_ENCODING_LINEAR_16 = 1;
+
+ // [`FLAC`](https://xiph.org/flac/documentation.html) (Free Lossless Audio
+ // Codec) is the recommended encoding because it is lossless (therefore
+ // recognition is not compromised) and requires only about half the
+ // bandwidth of `LINEAR16`. `FLAC` stream encoding supports 16-bit and
+ // 24-bit samples, however, not all fields in `STREAMINFO` are supported.
+ AUDIO_ENCODING_FLAC = 2;
+
+ // 8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law.
+ AUDIO_ENCODING_MULAW = 3;
+
+ // Adaptive Multi-Rate Narrowband codec. `sample_rate_hertz` must be 8000.
+ AUDIO_ENCODING_AMR = 4;
+
+ // Adaptive Multi-Rate Wideband codec. `sample_rate_hertz` must be 16000.
+ AUDIO_ENCODING_AMR_WB = 5;
+
+ // Opus encoded audio frames in Ogg container
+ // ([OggOpus](https://wiki.xiph.org/OggOpus)).
+ // `sample_rate_hertz` must be 16000.
+ AUDIO_ENCODING_OGG_OPUS = 6;
+
+ // Although the use of lossy encodings is not recommended, if a very low
+ // bitrate encoding is required, `OGG_OPUS` is highly preferred over
+ // Speex encoding. The [Speex](https://speex.org/) encoding supported by
+ // Dialogflow API has a header byte in each block, as in MIME type
+ // `audio/x-speex-with-header-byte`.
+ // It is a variant of the RTP Speex encoding defined in
+ // [RFC 5574](https://tools.ietf.org/html/rfc5574).
+ // The stream is a sequence of blocks, one block per RTP packet. Each block
+ // starts with a byte containing the length of the block, in bytes, followed
+ // by one or more frames of Speex data, padded to an integral number of
+ // bytes (octets) as specified in RFC 5574. In other words, each RTP header
+ // is replaced with a single byte containing the block length. Only Speex
+ // wideband is supported. `sample_rate_hertz` must be 16000.
+ AUDIO_ENCODING_SPEEX_WITH_HEADER_BYTE = 7;
+}