legacy-libs/google-proto-files/google/cloud/language/v1beta1/language_service.proto

   1 // Copyright 2017 Google Inc.
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //     http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15 syntax = "proto3";
  16
  17 package google.cloud.language.v1beta1;
  18
  19 import "google/api/annotations.proto";
  20
  21 option go_package = "google.golang.org/genproto/googleapis/cloud/language/v1beta1;language";
  22 option java_multiple_files = true;
  23 option java_outer_classname = "LanguageServiceProto";
  24 option java_package = "com.google.cloud.language.v1beta1";
  25
  26 // Provides text analysis operations such as sentiment analysis and entity
  27 // recognition.
  28 service LanguageService {
  29   // Analyzes the sentiment of the provided text.
  30   rpc AnalyzeSentiment(AnalyzeSentimentRequest)
  31       returns (AnalyzeSentimentResponse) {
  32     option (google.api.http) = {
  33       post: "/v1beta1/documents:analyzeSentiment"
  34       body: "*"
  35     };
  36   }
  37
  38   // Finds named entities (currently proper names and common nouns) in the text
  39   // along with entity types, salience, mentions for each entity, and
  40   // other properties.
  41   rpc AnalyzeEntities(AnalyzeEntitiesRequest)
  42       returns (AnalyzeEntitiesResponse) {
  43     option (google.api.http) = {
  44       post: "/v1beta1/documents:analyzeEntities"
  45       body: "*"
  46     };
  47   }
  48
  49   // Analyzes the syntax of the text and provides sentence boundaries and
  50   // tokenization along with part of speech tags, dependency trees, and other
  51   // properties.
  52   rpc AnalyzeSyntax(AnalyzeSyntaxRequest) returns (AnalyzeSyntaxResponse) {
  53     option (google.api.http) = {
  54       post: "/v1beta1/documents:analyzeSyntax"
  55       body: "*"
  56     };
  57   }
  58
  59   // A convenience method that provides all the features that analyzeSentiment,
  60   // analyzeEntities, and analyzeSyntax provide in one call.
  61   rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) {
  62     option (google.api.http) = {
  63       post: "/v1beta1/documents:annotateText"
  64       body: "*"
  65     };
  66   }
  67 }
  68
  69 // ################################################################ #
  70 //
  71 // Represents the input to API methods.
  72 message Document {
  73   // The document types enum.
  74   enum Type {
  75     // The content type is not specified.
  76     TYPE_UNSPECIFIED = 0;
  77
  78     // Plain text
  79     PLAIN_TEXT = 1;
  80
  81     // HTML
  82     HTML = 2;
  83   }
  84
  85   // Required. If the type is not set or is `TYPE_UNSPECIFIED`,
  86   // returns an `INVALID_ARGUMENT` error.
  87   Type type = 1;
  88
  89   // The source of the document: a string containing the content or a
  90   // Google Cloud Storage URI.
  91   oneof source {
  92     // The content of the input in string format.
  93     string content = 2;
  94
  95     // The Google Cloud Storage URI where the file content is located.
  96     // This URI must be of the form: gs://bucket_name/object_name. For more
  97     // details, see https://cloud.google.com/storage/docs/reference-uris.
  98     // NOTE: Cloud Storage object versioning is not supported.
  99     string gcs_content_uri = 3;
 100   }
 101
 102   // The language of the document (if not specified, the language is
 103   // automatically detected). Both ISO and BCP-47 language codes are
 104   // accepted.<br>
 105   // [Language
 106   // Support](https://cloud.google.com/natural-language/docs/languages) lists
 107   // currently supported languages for each API method. If the language (either
 108   // specified by the caller or automatically detected) is not supported by the
 109   // called API method, an `INVALID_ARGUMENT` error is returned.
 110   string language = 4;
 111 }
 112
 113 // Represents a sentence in the input document.
 114 message Sentence {
 115   // The sentence text.
 116   TextSpan text = 1;
 117
 118   // For calls to [AnalyzeSentiment][] or if
 119   // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_document_sentiment]
 120   // is set to true, this field will contain the sentiment for the sentence.
 121   Sentiment sentiment = 2;
 122 }
 123
 124 // Represents a phrase in the text that is a known entity, such as
 125 // a person, an organization, or location. The API associates information, such
 126 // as salience and mentions, with entities.
 127 message Entity {
 128   // The type of the entity.
 129   enum Type {
 130     // Unknown
 131     UNKNOWN = 0;
 132
 133     // Person
 134     PERSON = 1;
 135
 136     // Location
 137     LOCATION = 2;
 138
 139     // Organization
 140     ORGANIZATION = 3;
 141
 142     // Event
 143     EVENT = 4;
 144
 145     // Work of art
 146     WORK_OF_ART = 5;
 147
 148     // Consumer goods
 149     CONSUMER_GOOD = 6;
 150
 151     // Other types
 152     OTHER = 7;
 153   }
 154
 155   // The representative name for the entity.
 156   string name = 1;
 157
 158   // The entity type.
 159   Type type = 2;
 160
 161   // Metadata associated with the entity.
 162   //
 163   // Currently, Wikipedia URLs and Knowledge Graph MIDs are provided, if
 164   // available. The associated keys are "wikipedia_url" and "mid", respectively.
 165   map<string, string> metadata = 3;
 166
 167   // The salience score associated with the entity in the [0, 1.0] range.
 168   //
 169   // The salience score for an entity provides information about the
 170   // importance or centrality of that entity to the entire document text.
 171   // Scores closer to 0 are less salient, while scores closer to 1.0 are highly
 172   // salient.
 173   float salience = 4;
 174
 175   // The mentions of this entity in the input document. The API currently
 176   // supports proper noun mentions.
 177   repeated EntityMention mentions = 5;
 178 }
 179
 180 // Represents the smallest syntactic building block of the text.
 181 message Token {
 182   // The token text.
 183   TextSpan text = 1;
 184
 185   // Parts of speech tag for this token.
 186   PartOfSpeech part_of_speech = 2;
 187
 188   // Dependency tree parse for this token.
 189   DependencyEdge dependency_edge = 3;
 190
 191   // [Lemma](https://en.wikipedia.org/wiki/Lemma_%28morphology%29) of the token.
 192   string lemma = 4;
 193 }
 194
 195 // Represents the feeling associated with the entire text or entities in
 196 // the text.
 197 message Sentiment {
 198   // DEPRECATED FIELD - This field is being deprecated in
 199   // favor of score. Please refer to our documentation at
 200   // https://cloud.google.com/natural-language/docs for more information.
 201   float polarity = 1;
 202
 203   // A non-negative number in the [0, +inf) range, which represents
 204   // the absolute magnitude of sentiment regardless of score (positive or
 205   // negative).
 206   float magnitude = 2;
 207
 208   // Sentiment score between -1.0 (negative sentiment) and 1.0
 209   // (positive sentiment).
 210   float score = 3;
 211 }
 212
 213 // Represents part of speech information for a token.
 214 message PartOfSpeech {
 215   // The part of speech tags enum.
 216   enum Tag {
 217     // Unknown
 218     UNKNOWN = 0;
 219
 220     // Adjective
 221     ADJ = 1;
 222
 223     // Adposition (preposition and postposition)
 224     ADP = 2;
 225
 226     // Adverb
 227     ADV = 3;
 228
 229     // Conjunction
 230     CONJ = 4;
 231
 232     // Determiner
 233     DET = 5;
 234
 235     // Noun (common and proper)
 236     NOUN = 6;
 237
 238     // Cardinal number
 239     NUM = 7;
 240
 241     // Pronoun
 242     PRON = 8;
 243
 244     // Particle or other function word
 245     PRT = 9;
 246
 247     // Punctuation
 248     PUNCT = 10;
 249
 250     // Verb (all tenses and modes)
 251     VERB = 11;
 252
 253     // Other: foreign words, typos, abbreviations
 254     X = 12;
 255
 256     // Affix
 257     AFFIX = 13;
 258   }
 259
 260   // The characteristic of a verb that expresses time flow during an event.
 261   enum Aspect {
 262     // Aspect is not applicable in the analyzed language or is not predicted.
 263     ASPECT_UNKNOWN = 0;
 264
 265     // Perfective
 266     PERFECTIVE = 1;
 267
 268     // Imperfective
 269     IMPERFECTIVE = 2;
 270
 271     // Progressive
 272     PROGRESSIVE = 3;
 273   }
 274
 275   // The grammatical function performed by a noun or pronoun in a phrase,
 276   // clause, or sentence. In some languages, other parts of speech, such as
 277   // adjective and determiner, take case inflection in agreement with the noun.
 278   enum Case {
 279     // Case is not applicable in the analyzed language or is not predicted.
 280     CASE_UNKNOWN = 0;
 281
 282     // Accusative
 283     ACCUSATIVE = 1;
 284
 285     // Adverbial
 286     ADVERBIAL = 2;
 287
 288     // Complementive
 289     COMPLEMENTIVE = 3;
 290
 291     // Dative
 292     DATIVE = 4;
 293
 294     // Genitive
 295     GENITIVE = 5;
 296
 297     // Instrumental
 298     INSTRUMENTAL = 6;
 299
 300     // Locative
 301     LOCATIVE = 7;
 302
 303     // Nominative
 304     NOMINATIVE = 8;
 305
 306     // Oblique
 307     OBLIQUE = 9;
 308
 309     // Partitive
 310     PARTITIVE = 10;
 311
 312     // Prepositional
 313     PREPOSITIONAL = 11;
 314
 315     // Reflexive
 316     REFLEXIVE_CASE = 12;
 317
 318     // Relative
 319     RELATIVE_CASE = 13;
 320
 321     // Vocative
 322     VOCATIVE = 14;
 323   }
 324
 325   // Depending on the language, Form can be categorizing different forms of
 326   // verbs, adjectives, adverbs, etc. For example, categorizing inflected
 327   // endings of verbs and adjectives or distinguishing between short and long
 328   // forms of adjectives and participles
 329   enum Form {
 330     // Form is not applicable in the analyzed language or is not predicted.
 331     FORM_UNKNOWN = 0;
 332
 333     // Adnomial
 334     ADNOMIAL = 1;
 335
 336     // Auxiliary
 337     AUXILIARY = 2;
 338
 339     // Complementizer
 340     COMPLEMENTIZER = 3;
 341
 342     // Final ending
 343     FINAL_ENDING = 4;
 344
 345     // Gerund
 346     GERUND = 5;
 347
 348     // Realis
 349     REALIS = 6;
 350
 351     // Irrealis
 352     IRREALIS = 7;
 353
 354     // Short form
 355     SHORT = 8;
 356
 357     // Long form
 358     LONG = 9;
 359
 360     // Order form
 361     ORDER = 10;
 362
 363     // Specific form
 364     SPECIFIC = 11;
 365   }
 366
 367   // Gender classes of nouns reflected in the behaviour of associated words.
 368   enum Gender {
 369     // Gender is not applicable in the analyzed language or is not predicted.
 370     GENDER_UNKNOWN = 0;
 371
 372     // Feminine
 373     FEMININE = 1;
 374
 375     // Masculine
 376     MASCULINE = 2;
 377
 378     // Neuter
 379     NEUTER = 3;
 380   }
 381
 382   // The grammatical feature of verbs, used for showing modality and attitude.
 383   enum Mood {
 384     // Mood is not applicable in the analyzed language or is not predicted.
 385     MOOD_UNKNOWN = 0;
 386
 387     // Conditional
 388     CONDITIONAL_MOOD = 1;
 389
 390     // Imperative
 391     IMPERATIVE = 2;
 392
 393     // Indicative
 394     INDICATIVE = 3;
 395
 396     // Interrogative
 397     INTERROGATIVE = 4;
 398
 399     // Jussive
 400     JUSSIVE = 5;
 401
 402     // Subjunctive
 403     SUBJUNCTIVE = 6;
 404   }
 405
 406   // Count distinctions.
 407   enum Number {
 408     // Number is not applicable in the analyzed language or is not predicted.
 409     NUMBER_UNKNOWN = 0;
 410
 411     // Singular
 412     SINGULAR = 1;
 413
 414     // Plural
 415     PLURAL = 2;
 416
 417     // Dual
 418     DUAL = 3;
 419   }
 420
 421   // The distinction between the speaker, second person, third person, etc.
 422   enum Person {
 423     // Person is not applicable in the analyzed language or is not predicted.
 424     PERSON_UNKNOWN = 0;
 425
 426     // First
 427     FIRST = 1;
 428
 429     // Second
 430     SECOND = 2;
 431
 432     // Third
 433     THIRD = 3;
 434
 435     // Reflexive
 436     REFLEXIVE_PERSON = 4;
 437   }
 438
 439   // This category shows if the token is part of a proper name.
 440   enum Proper {
 441     // Proper is not applicable in the analyzed language or is not predicted.
 442     PROPER_UNKNOWN = 0;
 443
 444     // Proper
 445     PROPER = 1;
 446
 447     // Not proper
 448     NOT_PROPER = 2;
 449   }
 450
 451   // Reciprocal features of a pronoun.
 452   enum Reciprocity {
 453     // Reciprocity is not applicable in the analyzed language or is not
 454     // predicted.
 455     RECIPROCITY_UNKNOWN = 0;
 456
 457     // Reciprocal
 458     RECIPROCAL = 1;
 459
 460     // Non-reciprocal
 461     NON_RECIPROCAL = 2;
 462   }
 463
 464   // Time reference.
 465   enum Tense {
 466     // Tense is not applicable in the analyzed language or is not predicted.
 467     TENSE_UNKNOWN = 0;
 468
 469     // Conditional
 470     CONDITIONAL_TENSE = 1;
 471
 472     // Future
 473     FUTURE = 2;
 474
 475     // Past
 476     PAST = 3;
 477
 478     // Present
 479     PRESENT = 4;
 480
 481     // Imperfect
 482     IMPERFECT = 5;
 483
 484     // Pluperfect
 485     PLUPERFECT = 6;
 486   }
 487
 488   // The relationship between the action that a verb expresses and the
 489   // participants identified by its arguments.
 490   enum Voice {
 491     // Voice is not applicable in the analyzed language or is not predicted.
 492     VOICE_UNKNOWN = 0;
 493
 494     // Active
 495     ACTIVE = 1;
 496
 497     // Causative
 498     CAUSATIVE = 2;
 499
 500     // Passive
 501     PASSIVE = 3;
 502   }
 503
 504   // The part of speech tag.
 505   Tag tag = 1;
 506
 507   // The grammatical aspect.
 508   Aspect aspect = 2;
 509
 510   // The grammatical case.
 511   Case case = 3;
 512
 513   // The grammatical form.
 514   Form form = 4;
 515
 516   // The grammatical gender.
 517   Gender gender = 5;
 518
 519   // The grammatical mood.
 520   Mood mood = 6;
 521
 522   // The grammatical number.
 523   Number number = 7;
 524
 525   // The grammatical person.
 526   Person person = 8;
 527
 528   // The grammatical properness.
 529   Proper proper = 9;
 530
 531   // The grammatical reciprocity.
 532   Reciprocity reciprocity = 10;
 533
 534   // The grammatical tense.
 535   Tense tense = 11;
 536
 537   // The grammatical voice.
 538   Voice voice = 12;
 539 }
 540
 541 // Represents dependency parse tree information for a token.
 542 message DependencyEdge {
 543   // The parse label enum for the token.
 544   enum Label {
 545     // Unknown
 546     UNKNOWN = 0;
 547
 548     // Abbreviation modifier
 549     ABBREV = 1;
 550
 551     // Adjectival complement
 552     ACOMP = 2;
 553
 554     // Adverbial clause modifier
 555     ADVCL = 3;
 556
 557     // Adverbial modifier
 558     ADVMOD = 4;
 559
 560     // Adjectival modifier of an NP
 561     AMOD = 5;
 562
 563     // Appositional modifier of an NP
 564     APPOS = 6;
 565
 566     // Attribute dependent of a copular verb
 567     ATTR = 7;
 568
 569     // Auxiliary (non-main) verb
 570     AUX = 8;
 571
 572     // Passive auxiliary
 573     AUXPASS = 9;
 574
 575     // Coordinating conjunction
 576     CC = 10;
 577
 578     // Clausal complement of a verb or adjective
 579     CCOMP = 11;
 580
 581     // Conjunct
 582     CONJ = 12;
 583
 584     // Clausal subject
 585     CSUBJ = 13;
 586
 587     // Clausal passive subject
 588     CSUBJPASS = 14;
 589
 590     // Dependency (unable to determine)
 591     DEP = 15;
 592
 593     // Determiner
 594     DET = 16;
 595
 596     // Discourse
 597     DISCOURSE = 17;
 598
 599     // Direct object
 600     DOBJ = 18;
 601
 602     // Expletive
 603     EXPL = 19;
 604
 605     // Goes with (part of a word in a text not well edited)
 606     GOESWITH = 20;
 607
 608     // Indirect object
 609     IOBJ = 21;
 610
 611     // Marker (word introducing a subordinate clause)
 612     MARK = 22;
 613
 614     // Multi-word expression
 615     MWE = 23;
 616
 617     // Multi-word verbal expression
 618     MWV = 24;
 619
 620     // Negation modifier
 621     NEG = 25;
 622
 623     // Noun compound modifier
 624     NN = 26;
 625
 626     // Noun phrase used as an adverbial modifier
 627     NPADVMOD = 27;
 628
 629     // Nominal subject
 630     NSUBJ = 28;
 631
 632     // Passive nominal subject
 633     NSUBJPASS = 29;
 634
 635     // Numeric modifier of a noun
 636     NUM = 30;
 637
 638     // Element of compound number
 639     NUMBER = 31;
 640
 641     // Punctuation mark
 642     P = 32;
 643
 644     // Parataxis relation
 645     PARATAXIS = 33;
 646
 647     // Participial modifier
 648     PARTMOD = 34;
 649
 650     // The complement of a preposition is a clause
 651     PCOMP = 35;
 652
 653     // Object of a preposition
 654     POBJ = 36;
 655
 656     // Possession modifier
 657     POSS = 37;
 658
 659     // Postverbal negative particle
 660     POSTNEG = 38;
 661
 662     // Predicate complement
 663     PRECOMP = 39;
 664
 665     // Preconjunt
 666     PRECONJ = 40;
 667
 668     // Predeterminer
 669     PREDET = 41;
 670
 671     // Prefix
 672     PREF = 42;
 673
 674     // Prepositional modifier
 675     PREP = 43;
 676
 677     // The relationship between a verb and verbal morpheme
 678     PRONL = 44;
 679
 680     // Particle
 681     PRT = 45;
 682
 683     // Associative or possessive marker
 684     PS = 46;
 685
 686     // Quantifier phrase modifier
 687     QUANTMOD = 47;
 688
 689     // Relative clause modifier
 690     RCMOD = 48;
 691
 692     // Complementizer in relative clause
 693     RCMODREL = 49;
 694
 695     // Ellipsis without a preceding predicate
 696     RDROP = 50;
 697
 698     // Referent
 699     REF = 51;
 700
 701     // Remnant
 702     REMNANT = 52;
 703
 704     // Reparandum
 705     REPARANDUM = 53;
 706
 707     // Root
 708     ROOT = 54;
 709
 710     // Suffix specifying a unit of number
 711     SNUM = 55;
 712
 713     // Suffix
 714     SUFF = 56;
 715
 716     // Temporal modifier
 717     TMOD = 57;
 718
 719     // Topic marker
 720     TOPIC = 58;
 721
 722     // Clause headed by an infinite form of the verb that modifies a noun
 723     VMOD = 59;
 724
 725     // Vocative
 726     VOCATIVE = 60;
 727
 728     // Open clausal complement
 729     XCOMP = 61;
 730
 731     // Name suffix
 732     SUFFIX = 62;
 733
 734     // Name title
 735     TITLE = 63;
 736
 737     // Adverbial phrase modifier
 738     ADVPHMOD = 64;
 739
 740     // Causative auxiliary
 741     AUXCAUS = 65;
 742
 743     // Helper auxiliary
 744     AUXVV = 66;
 745
 746     // Rentaishi (Prenominal modifier)
 747     DTMOD = 67;
 748
 749     // Foreign words
 750     FOREIGN = 68;
 751
 752     // Keyword
 753     KW = 69;
 754
 755     // List for chains of comparable items
 756     LIST = 70;
 757
 758     // Nominalized clause
 759     NOMC = 71;
 760
 761     // Nominalized clausal subject
 762     NOMCSUBJ = 72;
 763
 764     // Nominalized clausal passive
 765     NOMCSUBJPASS = 73;
 766
 767     // Compound of numeric modifier
 768     NUMC = 74;
 769
 770     // Copula
 771     COP = 75;
 772
 773     // Dislocated relation (for fronted/topicalized elements)
 774     DISLOCATED = 76;
 775   }
 776
 777   // Represents the head of this token in the dependency tree.
 778   // This is the index of the token which has an arc going to this token.
 779   // The index is the position of the token in the array of tokens returned
 780   // by the API method. If this token is a root token, then the
 781   // `head_token_index` is its own index.
 782   int32 head_token_index = 1;
 783
 784   // The parse label for the token.
 785   Label label = 2;
 786 }
 787
 788 // Represents a mention for an entity in the text. Currently, proper noun
 789 // mentions are supported.
 790 message EntityMention {
 791   // The supported types of mentions.
 792   enum Type {
 793     // Unknown
 794     TYPE_UNKNOWN = 0;
 795
 796     // Proper name
 797     PROPER = 1;
 798
 799     // Common noun (or noun compound)
 800     COMMON = 2;
 801   }
 802
 803   // The mention text.
 804   TextSpan text = 1;
 805
 806   // The type of the entity mention.
 807   Type type = 2;
 808 }
 809
 810 // Represents an output piece of text.
 811 message TextSpan {
 812   // The content of the output text.
 813   string content = 1;
 814
 815   // The API calculates the beginning offset of the content in the original
 816   // document according to the
 817   // [EncodingType][google.cloud.language.v1beta1.EncodingType] specified in the
 818   // API request.
 819   int32 begin_offset = 2;
 820 }
 821
 822 // The sentiment analysis request message.
 823 message AnalyzeSentimentRequest {
 824   // Input document.
 825   Document document = 1;
 826
 827   // The encoding type used by the API to calculate sentence offsets for the
 828   // sentence sentiment.
 829   EncodingType encoding_type = 2;
 830 }
 831
 832 // The sentiment analysis response message.
 833 message AnalyzeSentimentResponse {
 834   // The overall sentiment of the input document.
 835   Sentiment document_sentiment = 1;
 836
 837   // The language of the text, which will be the same as the language specified
 838   // in the request or, if not specified, the automatically-detected language.
 839   // See [Document.language][google.cloud.language.v1beta1.Document.language]
 840   // field for more details.
 841   string language = 2;
 842
 843   // The sentiment for all the sentences in the document.
 844   repeated Sentence sentences = 3;
 845 }
 846
 847 // The entity analysis request message.
 848 message AnalyzeEntitiesRequest {
 849   // Input document.
 850   Document document = 1;
 851
 852   // The encoding type used by the API to calculate offsets.
 853   EncodingType encoding_type = 2;
 854 }
 855
 856 // The entity analysis response message.
 857 message AnalyzeEntitiesResponse {
 858   // The recognized entities in the input document.
 859   repeated Entity entities = 1;
 860
 861   // The language of the text, which will be the same as the language specified
 862   // in the request or, if not specified, the automatically-detected language.
 863   // See [Document.language][google.cloud.language.v1beta1.Document.language]
 864   // field for more details.
 865   string language = 2;
 866 }
 867
 868 // The syntax analysis request message.
 869 message AnalyzeSyntaxRequest {
 870   // Input document.
 871   Document document = 1;
 872
 873   // The encoding type used by the API to calculate offsets.
 874   EncodingType encoding_type = 2;
 875 }
 876
 877 // The syntax analysis response message.
 878 message AnalyzeSyntaxResponse {
 879   // Sentences in the input document.
 880   repeated Sentence sentences = 1;
 881
 882   // Tokens, along with their syntactic information, in the input document.
 883   repeated Token tokens = 2;
 884
 885   // The language of the text, which will be the same as the language specified
 886   // in the request or, if not specified, the automatically-detected language.
 887   // See [Document.language][google.cloud.language.v1beta1.Document.language]
 888   // field for more details.
 889   string language = 3;
 890 }
 891
 892 // The request message for the text annotation API, which can perform multiple
 893 // analysis types (sentiment, entities, and syntax) in one call.
 894 message AnnotateTextRequest {
 895   // All available features for sentiment, syntax, and semantic analysis.
 896   // Setting each one to true will enable that specific analysis for the input.
 897   message Features {
 898     // Extract syntax information.
 899     bool extract_syntax = 1;
 900
 901     // Extract entities.
 902     bool extract_entities = 2;
 903
 904     // Extract document-level sentiment.
 905     bool extract_document_sentiment = 3;
 906   }
 907
 908   // Input document.
 909   Document document = 1;
 910
 911   // The enabled features.
 912   Features features = 2;
 913
 914   // The encoding type used by the API to calculate offsets.
 915   EncodingType encoding_type = 3;
 916 }
 917
 918 // The text annotations response message.
 919 message AnnotateTextResponse {
 920   // Sentences in the input document. Populated if the user enables
 921   // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_syntax].
 922   repeated Sentence sentences = 1;
 923
 924   // Tokens, along with their syntactic information, in the input document.
 925   // Populated if the user enables
 926   // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_syntax].
 927   repeated Token tokens = 2;
 928
 929   // Entities, along with their semantic information, in the input document.
 930   // Populated if the user enables
 931   // [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_entities].
 932   repeated Entity entities = 3;
 933
 934   // The overall sentiment for the document. Populated if the user enables
 935   // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_document_sentiment].
 936   Sentiment document_sentiment = 4;
 937
 938   // The language of the text, which will be the same as the language specified
 939   // in the request or, if not specified, the automatically-detected language.
 940   // See [Document.language][google.cloud.language.v1beta1.Document.language]
 941   // field for more details.
 942   string language = 5;
 943 }
 944
 945 // Represents the text encoding that the caller uses to process the output.
 946 // Providing an `EncodingType` is recommended because the API provides the
 947 // beginning offsets for various outputs, such as tokens and mentions, and
 948 // languages that natively use different text encodings may access offsets
 949 // differently.
 950 enum EncodingType {
 951   // If `EncodingType` is not specified, encoding-dependent information (such as
 952   // `begin_offset`) will be set at `-1`.
 953   NONE = 0;
 954
 955   // Encoding-dependent information (such as `begin_offset`) is calculated based
 956   // on the UTF-8 encoding of the input. C++ and Go are examples of languages
 957   // that use this encoding natively.
 958   UTF8 = 1;
 959
 960   // Encoding-dependent information (such as `begin_offset`) is calculated based
 961   // on the UTF-16 encoding of the input. Java and Javascript are examples of
 962   // languages that use this encoding natively.
 963   UTF16 = 2;
 964
 965   // Encoding-dependent information (such as `begin_offset`) is calculated based
 966   // on the UTF-32 encoding of the input. Python is an example of a language
 967   // that uses this encoding natively.
 968   UTF32 = 3;
 969 }