1 // Copyright 2019 Google LLC.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
18 package google.cloud.language.v1;
20 import "google/api/annotations.proto";
22 option go_package = "google.golang.org/genproto/googleapis/cloud/language/v1;language";
23 option java_multiple_files = true;
24 option java_outer_classname = "LanguageServiceProto";
25 option java_package = "com.google.cloud.language.v1";
27 // Provides text analysis operations such as sentiment analysis and entity
29 service LanguageService {
30 // Analyzes the sentiment of the provided text.
31 rpc AnalyzeSentiment(AnalyzeSentimentRequest)
32 returns (AnalyzeSentimentResponse) {
33 option (google.api.http) = {
34 post: "/v1/documents:analyzeSentiment"
39 // Finds named entities (currently proper names and common nouns) in the text
40 // along with entity types, salience, mentions for each entity, and
42 rpc AnalyzeEntities(AnalyzeEntitiesRequest)
43 returns (AnalyzeEntitiesResponse) {
44 option (google.api.http) = {
45 post: "/v1/documents:analyzeEntities"
50 // Finds entities, similar to
51 // [AnalyzeEntities][google.cloud.language.v1.LanguageService.AnalyzeEntities]
52 // in the text and analyzes sentiment associated with each entity and its
54 rpc AnalyzeEntitySentiment(AnalyzeEntitySentimentRequest)
55 returns (AnalyzeEntitySentimentResponse) {
56 option (google.api.http) = {
57 post: "/v1/documents:analyzeEntitySentiment"
62 // Analyzes the syntax of the text and provides sentence boundaries and
63 // tokenization along with part of speech tags, dependency trees, and other
65 rpc AnalyzeSyntax(AnalyzeSyntaxRequest) returns (AnalyzeSyntaxResponse) {
66 option (google.api.http) = {
67 post: "/v1/documents:analyzeSyntax"
72 // Classifies a document into categories.
73 rpc ClassifyText(ClassifyTextRequest) returns (ClassifyTextResponse) {
74 option (google.api.http) = {
75 post: "/v1/documents:classifyText"
80 // A convenience method that provides all the features that analyzeSentiment,
81 // analyzeEntities, and analyzeSyntax provide in one call.
82 rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) {
83 option (google.api.http) = {
84 post: "/v1/documents:annotateText"
90 // ################################################################ #
92 // Represents the input to API methods.
94 // The document types enum.
96 // The content type is not specified.
106 // Required. If the type is not set or is `TYPE_UNSPECIFIED`,
107 // returns an `INVALID_ARGUMENT` error.
110 // The source of the document: a string containing the content or a
111 // Google Cloud Storage URI.
113 // The content of the input in string format.
116 // The Google Cloud Storage URI where the file content is located.
117 // This URI must be of the form: gs://bucket_name/object_name. For more
118 // details, see https://cloud.google.com/storage/docs/reference-uris.
119 // NOTE: Cloud Storage object versioning is not supported.
120 string gcs_content_uri = 3;
123 // The language of the document (if not specified, the language is
124 // automatically detected). Both ISO and BCP-47 language codes are
126 // [Language Support](/natural-language/docs/languages)
127 // lists currently supported languages for each API method.
128 // If the language (either specified by the caller or automatically detected)
129 // is not supported by the called API method, an `INVALID_ARGUMENT` error
134 // Represents a sentence in the input document.
136 // The sentence text.
139 // For calls to [AnalyzeSentiment][] or if
140 // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_document_sentiment]
141 // is set to true, this field will contain the sentiment for the sentence.
142 Sentiment sentiment = 2;
145 // Represents a phrase in the text that is a known entity, such as
146 // a person, an organization, or location. The API associates information, such
147 // as salience and mentions, with entities.
149 // The type of the entity.
191 // The representative name for the entity.
197 // Metadata associated with the entity.
199 // Currently, Wikipedia URLs and Knowledge Graph MIDs are provided, if
200 // available. The associated keys are "wikipedia_url" and "mid", respectively.
201 map<string, string> metadata = 3;
203 // The salience score associated with the entity in the [0, 1.0] range.
205 // The salience score for an entity provides information about the
206 // importance or centrality of that entity to the entire document text.
207 // Scores closer to 0 are less salient, while scores closer to 1.0 are highly
211 // The mentions of this entity in the input document. The API currently
212 // supports proper noun mentions.
213 repeated EntityMention mentions = 5;
215 // For calls to [AnalyzeEntitySentiment][] or if
216 // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_entity_sentiment]
217 // is set to true, this field will contain the aggregate sentiment expressed
218 // for this entity in the provided document.
219 Sentiment sentiment = 6;
222 // Represents the text encoding that the caller uses to process the output.
223 // Providing an `EncodingType` is recommended because the API provides the
224 // beginning offsets for various outputs, such as tokens and mentions, and
225 // languages that natively use different text encodings may access offsets
228 // If `EncodingType` is not specified, encoding-dependent information (such as
229 // `begin_offset`) will be set at `-1`.
232 // Encoding-dependent information (such as `begin_offset`) is calculated based
233 // on the UTF-8 encoding of the input. C++ and Go are examples of languages
234 // that use this encoding natively.
237 // Encoding-dependent information (such as `begin_offset`) is calculated based
238 // on the UTF-16 encoding of the input. Java and JavaScript are examples of
239 // languages that use this encoding natively.
242 // Encoding-dependent information (such as `begin_offset`) is calculated based
243 // on the UTF-32 encoding of the input. Python is an example of a language
244 // that uses this encoding natively.
248 // Represents the smallest syntactic building block of the text.
253 // Parts of speech tag for this token.
254 PartOfSpeech part_of_speech = 2;
256 // Dependency tree parse for this token.
257 DependencyEdge dependency_edge = 3;
259 // [Lemma](https://en.wikipedia.org/wiki/Lemma_%28morphology%29) of the token.
263 // Represents the feeling associated with the entire text or entities in
266 // A non-negative number in the [0, +inf) range, which represents
267 // the absolute magnitude of sentiment regardless of score (positive or
271 // Sentiment score between -1.0 (negative sentiment) and 1.0
272 // (positive sentiment).
276 // Represents part of speech information for a token. Parts of speech
278 // http://www.lrec-conf.org/proceedings/lrec2012/pdf/274_Paper.pdf
279 message PartOfSpeech {
280 // The part of speech tags enum.
288 // Adposition (preposition and postposition)
300 // Noun (common and proper)
309 // Particle or other function word
315 // Verb (all tenses and modes)
318 // Other: foreign words, typos, abbreviations
325 // The characteristic of a verb that expresses time flow during an event.
327 // Aspect is not applicable in the analyzed language or is not predicted.
340 // The grammatical function performed by a noun or pronoun in a phrase,
341 // clause, or sentence. In some languages, other parts of speech, such as
342 // adjective and determiner, take case inflection in agreement with the noun.
344 // Case is not applicable in the analyzed language or is not predicted.
390 // Depending on the language, Form can be categorizing different forms of
391 // verbs, adjectives, adverbs, etc. For example, categorizing inflected
392 // endings of verbs and adjectives or distinguishing between short and long
393 // forms of adjectives and participles
395 // Form is not applicable in the analyzed language or is not predicted.
432 // Gender classes of nouns reflected in the behaviour of associated words.
434 // Gender is not applicable in the analyzed language or is not predicted.
447 // The grammatical feature of verbs, used for showing modality and attitude.
449 // Mood is not applicable in the analyzed language or is not predicted.
453 CONDITIONAL_MOOD = 1;
471 // Count distinctions.
473 // Number is not applicable in the analyzed language or is not predicted.
486 // The distinction between the speaker, second person, third person, etc.
488 // Person is not applicable in the analyzed language or is not predicted.
501 REFLEXIVE_PERSON = 4;
504 // This category shows if the token is part of a proper name.
506 // Proper is not applicable in the analyzed language or is not predicted.
516 // Reciprocal features of a pronoun.
518 // Reciprocity is not applicable in the analyzed language or is not
520 RECIPROCITY_UNKNOWN = 0;
531 // Tense is not applicable in the analyzed language or is not predicted.
535 CONDITIONAL_TENSE = 1;
553 // The relationship between the action that a verb expresses and the
554 // participants identified by its arguments.
556 // Voice is not applicable in the analyzed language or is not predicted.
569 // The part of speech tag.
572 // The grammatical aspect.
575 // The grammatical case.
578 // The grammatical form.
581 // The grammatical gender.
584 // The grammatical mood.
587 // The grammatical number.
590 // The grammatical person.
593 // The grammatical properness.
596 // The grammatical reciprocity.
597 Reciprocity reciprocity = 10;
599 // The grammatical tense.
602 // The grammatical voice.
606 // Represents dependency parse tree information for a token. (For more
607 // information on dependency labels, see
608 // http://www.aclweb.org/anthology/P13-2017
609 message DependencyEdge {
610 // The parse label enum for the token.
615 // Abbreviation modifier
618 // Adjectival complement
621 // Adverbial clause modifier
624 // Adverbial modifier
627 // Adjectival modifier of an NP
630 // Appositional modifier of an NP
633 // Attribute dependent of a copular verb
636 // Auxiliary (non-main) verb
642 // Coordinating conjunction
645 // Clausal complement of a verb or adjective
654 // Clausal passive subject
657 // Dependency (unable to determine)
672 // Goes with (part of a word in a text not well edited)
678 // Marker (word introducing a subordinate clause)
681 // Multi-word expression
684 // Multi-word verbal expression
690 // Noun compound modifier
693 // Noun phrase used as an adverbial modifier
699 // Passive nominal subject
702 // Numeric modifier of a noun
705 // Element of compound number
711 // Parataxis relation
714 // Participial modifier
717 // The complement of a preposition is a clause
720 // Object of a preposition
723 // Possession modifier
726 // Postverbal negative particle
729 // Predicate complement
741 // Prepositional modifier
744 // The relationship between a verb and verbal morpheme
750 // Associative or possessive marker
753 // Quantifier phrase modifier
756 // Relative clause modifier
759 // Complementizer in relative clause
762 // Ellipsis without a preceding predicate
777 // Suffix specifying a unit of number
789 // Clause headed by an infinite form of the verb that modifies a noun
795 // Open clausal complement
804 // Adverbial phrase modifier
807 // Causative auxiliary
813 // Rentaishi (Prenominal modifier)
822 // List for chains of comparable items
825 // Nominalized clause
828 // Nominalized clausal subject
831 // Nominalized clausal passive
834 // Compound of numeric modifier
840 // Dislocated relation (for fronted/topicalized elements)
852 // Infinitival modifier
858 // Nominal complement of a noun
862 // Represents the head of this token in the dependency tree.
863 // This is the index of the token which has an arc going to this token.
864 // The index is the position of the token in the array of tokens returned
865 // by the API method. If this token is a root token, then the
866 // `head_token_index` is its own index.
867 int32 head_token_index = 1;
869 // The parse label for the token.
873 // Represents a mention for an entity in the text. Currently, proper noun
874 // mentions are supported.
875 message EntityMention {
876 // The supported types of mentions.
884 // Common noun (or noun compound)
891 // The type of the entity mention.
894 // For calls to [AnalyzeEntitySentiment][] or if
895 // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_entity_sentiment]
896 // is set to true, this field will contain the sentiment expressed for this
897 // mention of the entity in the provided document.
898 Sentiment sentiment = 3;
901 // Represents an output piece of text.
903 // The content of the output text.
906 // The API calculates the beginning offset of the content in the original
907 // document according to the
908 // [EncodingType][google.cloud.language.v1.EncodingType] specified in the API
910 int32 begin_offset = 2;
913 // Represents a category returned from the text classifier.
914 message ClassificationCategory {
915 // The name of the category representing the document, from the [predefined
916 // taxonomy](/natural-language/docs/categories).
919 // The classifier's confidence of the category. Number represents how certain
920 // the classifier is that this category represents the given text.
921 float confidence = 2;
924 // The sentiment analysis request message.
925 message AnalyzeSentimentRequest {
927 Document document = 1;
929 // The encoding type used by the API to calculate sentence offsets.
930 EncodingType encoding_type = 2;
933 // The sentiment analysis response message.
934 message AnalyzeSentimentResponse {
935 // The overall sentiment of the input document.
936 Sentiment document_sentiment = 1;
938 // The language of the text, which will be the same as the language specified
939 // in the request or, if not specified, the automatically-detected language.
940 // See [Document.language][google.cloud.language.v1.Document.language] field
944 // The sentiment for all the sentences in the document.
945 repeated Sentence sentences = 3;
948 // The entity-level sentiment analysis request message.
949 message AnalyzeEntitySentimentRequest {
951 Document document = 1;
953 // The encoding type used by the API to calculate offsets.
954 EncodingType encoding_type = 2;
957 // The entity-level sentiment analysis response message.
958 message AnalyzeEntitySentimentResponse {
959 // The recognized entities in the input document with associated sentiments.
960 repeated Entity entities = 1;
962 // The language of the text, which will be the same as the language specified
963 // in the request or, if not specified, the automatically-detected language.
964 // See [Document.language][google.cloud.language.v1.Document.language] field
969 // The entity analysis request message.
970 message AnalyzeEntitiesRequest {
972 Document document = 1;
974 // The encoding type used by the API to calculate offsets.
975 EncodingType encoding_type = 2;
978 // The entity analysis response message.
979 message AnalyzeEntitiesResponse {
980 // The recognized entities in the input document.
981 repeated Entity entities = 1;
983 // The language of the text, which will be the same as the language specified
984 // in the request or, if not specified, the automatically-detected language.
985 // See [Document.language][google.cloud.language.v1.Document.language] field
990 // The syntax analysis request message.
991 message AnalyzeSyntaxRequest {
993 Document document = 1;
995 // The encoding type used by the API to calculate offsets.
996 EncodingType encoding_type = 2;
999 // The syntax analysis response message.
1000 message AnalyzeSyntaxResponse {
1001 // Sentences in the input document.
1002 repeated Sentence sentences = 1;
1004 // Tokens, along with their syntactic information, in the input document.
1005 repeated Token tokens = 2;
1007 // The language of the text, which will be the same as the language specified
1008 // in the request or, if not specified, the automatically-detected language.
1009 // See [Document.language][google.cloud.language.v1.Document.language] field
1010 // for more details.
1011 string language = 3;
1014 // The document classification request message.
1015 message ClassifyTextRequest {
1017 Document document = 1;
1020 // The document classification response message.
1021 message ClassifyTextResponse {
1022 // Categories representing the input document.
1023 repeated ClassificationCategory categories = 1;
1026 // The request message for the text annotation API, which can perform multiple
1027 // analysis types (sentiment, entities, and syntax) in one call.
1028 message AnnotateTextRequest {
1029 // All available features for sentiment, syntax, and semantic analysis.
1030 // Setting each one to true will enable that specific analysis for the input.
1032 // Extract syntax information.
1033 bool extract_syntax = 1;
1035 // Extract entities.
1036 bool extract_entities = 2;
1038 // Extract document-level sentiment.
1039 bool extract_document_sentiment = 3;
1041 // Extract entities and their associated sentiment.
1042 bool extract_entity_sentiment = 4;
1044 // Classify the full document into categories.
1045 bool classify_text = 6;
1049 Document document = 1;
1051 // The enabled features.
1052 Features features = 2;
1054 // The encoding type used by the API to calculate offsets.
1055 EncodingType encoding_type = 3;
1058 // The text annotations response message.
1059 message AnnotateTextResponse {
1060 // Sentences in the input document. Populated if the user enables
1061 // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1.AnnotateTextRequest.Features.extract_syntax].
1062 repeated Sentence sentences = 1;
1064 // Tokens, along with their syntactic information, in the input document.
1065 // Populated if the user enables
1066 // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1.AnnotateTextRequest.Features.extract_syntax].
1067 repeated Token tokens = 2;
1069 // Entities, along with their semantic information, in the input document.
1070 // Populated if the user enables
1071 // [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v1.AnnotateTextRequest.Features.extract_entities].
1072 repeated Entity entities = 3;
1074 // The overall sentiment for the document. Populated if the user enables
1075 // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_document_sentiment].
1076 Sentiment document_sentiment = 4;
1078 // The language of the text, which will be the same as the language specified
1079 // in the request or, if not specified, the automatically-detected language.
1080 // See [Document.language][google.cloud.language.v1.Document.language] field
1081 // for more details.
1082 string language = 5;
1084 // Categories identified in the input document.
1085 repeated ClassificationCategory categories = 6;