1 // Copyright 2017 Google Inc.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
17 package google.cloud.language.v1beta1;
19 import "google/api/annotations.proto";
21 option go_package = "google.golang.org/genproto/googleapis/cloud/language/v1beta1;language";
22 option java_multiple_files = true;
23 option java_outer_classname = "LanguageServiceProto";
24 option java_package = "com.google.cloud.language.v1beta1";
26 // Provides text analysis operations such as sentiment analysis and entity
28 service LanguageService {
29 // Analyzes the sentiment of the provided text.
30 rpc AnalyzeSentiment(AnalyzeSentimentRequest)
31 returns (AnalyzeSentimentResponse) {
32 option (google.api.http) = {
33 post: "/v1beta1/documents:analyzeSentiment"
38 // Finds named entities (currently proper names and common nouns) in the text
39 // along with entity types, salience, mentions for each entity, and
41 rpc AnalyzeEntities(AnalyzeEntitiesRequest)
42 returns (AnalyzeEntitiesResponse) {
43 option (google.api.http) = {
44 post: "/v1beta1/documents:analyzeEntities"
49 // Analyzes the syntax of the text and provides sentence boundaries and
50 // tokenization along with part of speech tags, dependency trees, and other
52 rpc AnalyzeSyntax(AnalyzeSyntaxRequest) returns (AnalyzeSyntaxResponse) {
53 option (google.api.http) = {
54 post: "/v1beta1/documents:analyzeSyntax"
59 // A convenience method that provides all the features that analyzeSentiment,
60 // analyzeEntities, and analyzeSyntax provide in one call.
61 rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) {
62 option (google.api.http) = {
63 post: "/v1beta1/documents:annotateText"
69 // ################################################################ #
71 // Represents the input to API methods.
73 // The document types enum.
75 // The content type is not specified.
85 // Required. If the type is not set or is `TYPE_UNSPECIFIED`,
86 // returns an `INVALID_ARGUMENT` error.
89 // The source of the document: a string containing the content or a
90 // Google Cloud Storage URI.
92 // The content of the input in string format.
95 // The Google Cloud Storage URI where the file content is located.
96 // This URI must be of the form: gs://bucket_name/object_name. For more
97 // details, see https://cloud.google.com/storage/docs/reference-uris.
98 // NOTE: Cloud Storage object versioning is not supported.
99 string gcs_content_uri = 3;
102 // The language of the document (if not specified, the language is
103 // automatically detected). Both ISO and BCP-47 language codes are
106 // Support](https://cloud.google.com/natural-language/docs/languages) lists
107 // currently supported languages for each API method. If the language (either
108 // specified by the caller or automatically detected) is not supported by the
109 // called API method, an `INVALID_ARGUMENT` error is returned.
113 // Represents a sentence in the input document.
115 // The sentence text.
118 // For calls to [AnalyzeSentiment][] or if
119 // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_document_sentiment]
120 // is set to true, this field will contain the sentiment for the sentence.
121 Sentiment sentiment = 2;
124 // Represents a phrase in the text that is a known entity, such as
125 // a person, an organization, or location. The API associates information, such
126 // as salience and mentions, with entities.
128 // The type of the entity.
155 // The representative name for the entity.
161 // Metadata associated with the entity.
163 // Currently, Wikipedia URLs and Knowledge Graph MIDs are provided, if
164 // available. The associated keys are "wikipedia_url" and "mid", respectively.
165 map<string, string> metadata = 3;
167 // The salience score associated with the entity in the [0, 1.0] range.
169 // The salience score for an entity provides information about the
170 // importance or centrality of that entity to the entire document text.
171 // Scores closer to 0 are less salient, while scores closer to 1.0 are highly
175 // The mentions of this entity in the input document. The API currently
176 // supports proper noun mentions.
177 repeated EntityMention mentions = 5;
180 // Represents the smallest syntactic building block of the text.
185 // Parts of speech tag for this token.
186 PartOfSpeech part_of_speech = 2;
188 // Dependency tree parse for this token.
189 DependencyEdge dependency_edge = 3;
191 // [Lemma](https://en.wikipedia.org/wiki/Lemma_%28morphology%29) of the token.
195 // Represents the feeling associated with the entire text or entities in
198 // DEPRECATED FIELD - This field is being deprecated in
199 // favor of score. Please refer to our documentation at
200 // https://cloud.google.com/natural-language/docs for more information.
203 // A non-negative number in the [0, +inf) range, which represents
204 // the absolute magnitude of sentiment regardless of score (positive or
208 // Sentiment score between -1.0 (negative sentiment) and 1.0
209 // (positive sentiment).
213 // Represents part of speech information for a token.
214 message PartOfSpeech {
215 // The part of speech tags enum.
223 // Adposition (preposition and postposition)
235 // Noun (common and proper)
244 // Particle or other function word
250 // Verb (all tenses and modes)
253 // Other: foreign words, typos, abbreviations
260 // The characteristic of a verb that expresses time flow during an event.
262 // Aspect is not applicable in the analyzed language or is not predicted.
275 // The grammatical function performed by a noun or pronoun in a phrase,
276 // clause, or sentence. In some languages, other parts of speech, such as
277 // adjective and determiner, take case inflection in agreement with the noun.
279 // Case is not applicable in the analyzed language or is not predicted.
325 // Depending on the language, Form can be categorizing different forms of
326 // verbs, adjectives, adverbs, etc. For example, categorizing inflected
327 // endings of verbs and adjectives or distinguishing between short and long
328 // forms of adjectives and participles
330 // Form is not applicable in the analyzed language or is not predicted.
367 // Gender classes of nouns reflected in the behaviour of associated words.
369 // Gender is not applicable in the analyzed language or is not predicted.
382 // The grammatical feature of verbs, used for showing modality and attitude.
384 // Mood is not applicable in the analyzed language or is not predicted.
388 CONDITIONAL_MOOD = 1;
406 // Count distinctions.
408 // Number is not applicable in the analyzed language or is not predicted.
421 // The distinction between the speaker, second person, third person, etc.
423 // Person is not applicable in the analyzed language or is not predicted.
436 REFLEXIVE_PERSON = 4;
439 // This category shows if the token is part of a proper name.
441 // Proper is not applicable in the analyzed language or is not predicted.
451 // Reciprocal features of a pronoun.
453 // Reciprocity is not applicable in the analyzed language or is not
455 RECIPROCITY_UNKNOWN = 0;
466 // Tense is not applicable in the analyzed language or is not predicted.
470 CONDITIONAL_TENSE = 1;
488 // The relationship between the action that a verb expresses and the
489 // participants identified by its arguments.
491 // Voice is not applicable in the analyzed language or is not predicted.
504 // The part of speech tag.
507 // The grammatical aspect.
510 // The grammatical case.
513 // The grammatical form.
516 // The grammatical gender.
519 // The grammatical mood.
522 // The grammatical number.
525 // The grammatical person.
528 // The grammatical properness.
531 // The grammatical reciprocity.
532 Reciprocity reciprocity = 10;
534 // The grammatical tense.
537 // The grammatical voice.
541 // Represents dependency parse tree information for a token.
542 message DependencyEdge {
543 // The parse label enum for the token.
548 // Abbreviation modifier
551 // Adjectival complement
554 // Adverbial clause modifier
557 // Adverbial modifier
560 // Adjectival modifier of an NP
563 // Appositional modifier of an NP
566 // Attribute dependent of a copular verb
569 // Auxiliary (non-main) verb
575 // Coordinating conjunction
578 // Clausal complement of a verb or adjective
587 // Clausal passive subject
590 // Dependency (unable to determine)
605 // Goes with (part of a word in a text not well edited)
611 // Marker (word introducing a subordinate clause)
614 // Multi-word expression
617 // Multi-word verbal expression
623 // Noun compound modifier
626 // Noun phrase used as an adverbial modifier
632 // Passive nominal subject
635 // Numeric modifier of a noun
638 // Element of compound number
644 // Parataxis relation
647 // Participial modifier
650 // The complement of a preposition is a clause
653 // Object of a preposition
656 // Possession modifier
659 // Postverbal negative particle
662 // Predicate complement
674 // Prepositional modifier
677 // The relationship between a verb and verbal morpheme
683 // Associative or possessive marker
686 // Quantifier phrase modifier
689 // Relative clause modifier
692 // Complementizer in relative clause
695 // Ellipsis without a preceding predicate
710 // Suffix specifying a unit of number
722 // Clause headed by an infinite form of the verb that modifies a noun
728 // Open clausal complement
737 // Adverbial phrase modifier
740 // Causative auxiliary
746 // Rentaishi (Prenominal modifier)
755 // List for chains of comparable items
758 // Nominalized clause
761 // Nominalized clausal subject
764 // Nominalized clausal passive
767 // Compound of numeric modifier
773 // Dislocated relation (for fronted/topicalized elements)
777 // Represents the head of this token in the dependency tree.
778 // This is the index of the token which has an arc going to this token.
779 // The index is the position of the token in the array of tokens returned
780 // by the API method. If this token is a root token, then the
781 // `head_token_index` is its own index.
782 int32 head_token_index = 1;
784 // The parse label for the token.
788 // Represents a mention for an entity in the text. Currently, proper noun
789 // mentions are supported.
790 message EntityMention {
791 // The supported types of mentions.
799 // Common noun (or noun compound)
806 // The type of the entity mention.
810 // Represents an output piece of text.
812 // The content of the output text.
815 // The API calculates the beginning offset of the content in the original
816 // document according to the
817 // [EncodingType][google.cloud.language.v1beta1.EncodingType] specified in the
819 int32 begin_offset = 2;
822 // The sentiment analysis request message.
823 message AnalyzeSentimentRequest {
825 Document document = 1;
827 // The encoding type used by the API to calculate sentence offsets for the
828 // sentence sentiment.
829 EncodingType encoding_type = 2;
832 // The sentiment analysis response message.
833 message AnalyzeSentimentResponse {
834 // The overall sentiment of the input document.
835 Sentiment document_sentiment = 1;
837 // The language of the text, which will be the same as the language specified
838 // in the request or, if not specified, the automatically-detected language.
839 // See [Document.language][google.cloud.language.v1beta1.Document.language]
840 // field for more details.
843 // The sentiment for all the sentences in the document.
844 repeated Sentence sentences = 3;
847 // The entity analysis request message.
848 message AnalyzeEntitiesRequest {
850 Document document = 1;
852 // The encoding type used by the API to calculate offsets.
853 EncodingType encoding_type = 2;
856 // The entity analysis response message.
857 message AnalyzeEntitiesResponse {
858 // The recognized entities in the input document.
859 repeated Entity entities = 1;
861 // The language of the text, which will be the same as the language specified
862 // in the request or, if not specified, the automatically-detected language.
863 // See [Document.language][google.cloud.language.v1beta1.Document.language]
864 // field for more details.
868 // The syntax analysis request message.
869 message AnalyzeSyntaxRequest {
871 Document document = 1;
873 // The encoding type used by the API to calculate offsets.
874 EncodingType encoding_type = 2;
877 // The syntax analysis response message.
878 message AnalyzeSyntaxResponse {
879 // Sentences in the input document.
880 repeated Sentence sentences = 1;
882 // Tokens, along with their syntactic information, in the input document.
883 repeated Token tokens = 2;
885 // The language of the text, which will be the same as the language specified
886 // in the request or, if not specified, the automatically-detected language.
887 // See [Document.language][google.cloud.language.v1beta1.Document.language]
888 // field for more details.
892 // The request message for the text annotation API, which can perform multiple
893 // analysis types (sentiment, entities, and syntax) in one call.
894 message AnnotateTextRequest {
895 // All available features for sentiment, syntax, and semantic analysis.
896 // Setting each one to true will enable that specific analysis for the input.
898 // Extract syntax information.
899 bool extract_syntax = 1;
902 bool extract_entities = 2;
904 // Extract document-level sentiment.
905 bool extract_document_sentiment = 3;
909 Document document = 1;
911 // The enabled features.
912 Features features = 2;
914 // The encoding type used by the API to calculate offsets.
915 EncodingType encoding_type = 3;
918 // The text annotations response message.
919 message AnnotateTextResponse {
920 // Sentences in the input document. Populated if the user enables
921 // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_syntax].
922 repeated Sentence sentences = 1;
924 // Tokens, along with their syntactic information, in the input document.
925 // Populated if the user enables
926 // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_syntax].
927 repeated Token tokens = 2;
929 // Entities, along with their semantic information, in the input document.
930 // Populated if the user enables
931 // [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_entities].
932 repeated Entity entities = 3;
934 // The overall sentiment for the document. Populated if the user enables
935 // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_document_sentiment].
936 Sentiment document_sentiment = 4;
938 // The language of the text, which will be the same as the language specified
939 // in the request or, if not specified, the automatically-detected language.
940 // See [Document.language][google.cloud.language.v1beta1.Document.language]
941 // field for more details.
945 // Represents the text encoding that the caller uses to process the output.
946 // Providing an `EncodingType` is recommended because the API provides the
947 // beginning offsets for various outputs, such as tokens and mentions, and
948 // languages that natively use different text encodings may access offsets
951 // If `EncodingType` is not specified, encoding-dependent information (such as
952 // `begin_offset`) will be set at `-1`.
955 // Encoding-dependent information (such as `begin_offset`) is calculated based
956 // on the UTF-8 encoding of the input. C++ and Go are examples of languages
957 // that use this encoding natively.
960 // Encoding-dependent information (such as `begin_offset`) is calculated based
961 // on the UTF-16 encoding of the input. Java and Javascript are examples of
962 // languages that use this encoding natively.
965 // Encoding-dependent information (such as `begin_offset`) is calculated based
966 // on the UTF-32 encoding of the input. Python is an example of a language
967 // that uses this encoding natively.