legacy-libs/google-proto-files/google/cloud/vision/v1p4beta1/image_annotator.proto

   1 // Copyright 2018 Google LLC.
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //     http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14 //
  15
  16 syntax = "proto3";
  17
  18 package google.cloud.vision.v1p4beta1;
  19
  20 import "google/api/annotations.proto";
  21 import "google/cloud/vision/v1p4beta1/geometry.proto";
  22 import "google/cloud/vision/v1p4beta1/product_search.proto";
  23 import "google/cloud/vision/v1p4beta1/text_annotation.proto";
  24 import "google/cloud/vision/v1p4beta1/web_detection.proto";
  25 import "google/longrunning/operations.proto";
  26 import "google/protobuf/field_mask.proto";
  27 import "google/protobuf/timestamp.proto";
  28 import "google/rpc/status.proto";
  29 import "google/type/color.proto";
  30 import "google/type/latlng.proto";
  31
  32 option cc_enable_arenas = true;
  33 option go_package = "google.golang.org/genproto/googleapis/cloud/vision/v1p4beta1;vision";
  34 option java_multiple_files = true;
  35 option java_outer_classname = "ImageAnnotatorProto";
  36 option java_package = "com.google.cloud.vision.v1p4beta1";
  37 option objc_class_prefix = "GCVN";
  38
  39 // Service that performs Google Cloud Vision API detection tasks over client
  40 // images, such as face, landmark, logo, label, and text detection. The
  41 // ImageAnnotator service returns detected entities from the images.
  42 service ImageAnnotator {
  43   // Run image detection and annotation for a batch of images.
  44   rpc BatchAnnotateImages(BatchAnnotateImagesRequest)
  45       returns (BatchAnnotateImagesResponse) {
  46     option (google.api.http) = {
  47       post: "/v1p4beta1/images:annotate"
  48       body: "*"
  49     };
  50   }
  51
  52   // Service that performs image detection and annotation for a batch of files.
  53   // Now only "application/pdf", "image/tiff" and "image/gif" are supported.
  54   //
  55   // This service will extract at most the first 10 frames (gif) or pages
  56   // (pdf or tiff) from each file provided and perform detection and annotation
  57   // for each image extracted.
  58   rpc BatchAnnotateFiles(BatchAnnotateFilesRequest)
  59       returns (BatchAnnotateFilesResponse) {
  60     option (google.api.http) = {
  61       post: "/v1p4beta1/files:annotate"
  62       body: "*"
  63     };
  64   }
  65
  66   // Run asynchronous image detection and annotation for a list of images.
  67   //
  68   // Progress and results can be retrieved through the
  69   // `google.longrunning.Operations` interface.
  70   // `Operation.metadata` contains `OperationMetadata` (metadata).
  71   // `Operation.response` contains `AsyncBatchAnnotateImagesResponse` (results).
  72   //
  73   // This service will write image annotation outputs to json files in customer
  74   // GCS bucket, each json file containing BatchAnnotateImagesResponse proto.
  75   rpc AsyncBatchAnnotateImages(AsyncBatchAnnotateImagesRequest)
  76       returns (google.longrunning.Operation) {
  77     option (google.api.http) = {
  78       post: "/v1p4beta1/images:asyncBatchAnnotate"
  79       body: "*"
  80     };
  81   }
  82
  83   // Run asynchronous image detection and annotation for a list of generic
  84   // files, such as PDF files, which may contain multiple pages and multiple
  85   // images per page. Progress and results can be retrieved through the
  86   // `google.longrunning.Operations` interface.
  87   // `Operation.metadata` contains `OperationMetadata` (metadata).
  88   // `Operation.response` contains `AsyncBatchAnnotateFilesResponse` (results).
  89   rpc AsyncBatchAnnotateFiles(AsyncBatchAnnotateFilesRequest)
  90       returns (google.longrunning.Operation) {
  91     option (google.api.http) = {
  92       post: "/v1p4beta1/files:asyncBatchAnnotate"
  93       body: "*"
  94     };
  95   }
  96 }
  97
  98 // The type of Google Cloud Vision API detection to perform, and the maximum
  99 // number of results to return for that type. Multiple `Feature` objects can
 100 // be specified in the `features` list.
 101 message Feature {
 102   // Type of Google Cloud Vision API feature to be extracted.
 103   enum Type {
 104     // Unspecified feature type.
 105     TYPE_UNSPECIFIED = 0;
 106
 107     // Run face detection.
 108     FACE_DETECTION = 1;
 109
 110     // Run landmark detection.
 111     LANDMARK_DETECTION = 2;
 112
 113     // Run logo detection.
 114     LOGO_DETECTION = 3;
 115
 116     // Run label detection.
 117     LABEL_DETECTION = 4;
 118
 119     // Run text detection / optical character recognition (OCR). Text detection
 120     // is optimized for areas of text within a larger image; if the image is
 121     // a document, use `DOCUMENT_TEXT_DETECTION` instead.
 122     TEXT_DETECTION = 5;
 123
 124     // Run dense text document OCR. Takes precedence when both
 125     // `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` are present.
 126     DOCUMENT_TEXT_DETECTION = 11;
 127
 128     // Run Safe Search to detect potentially unsafe
 129     // or undesirable content.
 130     SAFE_SEARCH_DETECTION = 6;
 131
 132     // Compute a set of image properties, such as the
 133     // image's dominant colors.
 134     IMAGE_PROPERTIES = 7;
 135
 136     // Run crop hints.
 137     CROP_HINTS = 9;
 138
 139     // Run web detection.
 140     WEB_DETECTION = 10;
 141
 142     // Run Product Search.
 143     PRODUCT_SEARCH = 12;
 144
 145     // Run localizer for object detection.
 146     OBJECT_LOCALIZATION = 19;
 147   }
 148
 149   // The feature type.
 150   Type type = 1;
 151
 152   // Maximum number of results of this type. Does not apply to
 153   // `TEXT_DETECTION`, `DOCUMENT_TEXT_DETECTION`, or `CROP_HINTS`.
 154   int32 max_results = 2;
 155
 156   // Model to use for the feature.
 157   // Supported values: "builtin/stable" (the default if unset) and
 158   // "builtin/latest".
 159   string model = 3;
 160 }
 161
 162 // External image source (Google Cloud Storage or web URL image location).
 163 message ImageSource {
 164   // **Use `image_uri` instead.**
 165   //
 166   // The Google Cloud Storage  URI of the form
 167   // `gs://bucket_name/object_name`. Object versioning is not supported. See
 168   // [Google Cloud Storage Request
 169   // URIs](https://cloud.google.com/storage/docs/reference-uris) for more info.
 170   string gcs_image_uri = 1;
 171
 172   // The URI of the source image. Can be either:
 173   //
 174   // 1. A Google Cloud Storage URI of the form
 175   //    `gs://bucket_name/object_name`. Object versioning is not supported. See
 176   //    [Google Cloud Storage Request
 177   //    URIs](https://cloud.google.com/storage/docs/reference-uris) for more
 178   //    info.
 179   //
 180   // 2. A publicly-accessible image HTTP/HTTPS URL. When fetching images from
 181   //    HTTP/HTTPS URLs, Google cannot guarantee that the request will be
 182   //    completed. Your request may fail if the specified host denies the
 183   //    request (e.g. due to request throttling or DOS prevention), or if Google
 184   //    throttles requests to the site for abuse prevention. You should not
 185   //    depend on externally-hosted images for production applications.
 186   //
 187   // When both `gcs_image_uri` and `image_uri` are specified, `image_uri` takes
 188   // precedence.
 189   string image_uri = 2;
 190 }
 191
 192 // Client image to perform Google Cloud Vision API tasks over.
 193 message Image {
 194   // Image content, represented as a stream of bytes.
 195   // Note: As with all `bytes` fields, protobuffers use a pure binary
 196   // representation, whereas JSON representations use base64.
 197   bytes content = 1;
 198
 199   // Google Cloud Storage image location, or publicly-accessible image
 200   // URL. If both `content` and `source` are provided for an image, `content`
 201   // takes precedence and is used to perform the image annotation request.
 202   ImageSource source = 2;
 203 }
 204
 205 // A bucketized representation of likelihood, which is intended to give clients
 206 // highly stable results across model upgrades.
 207 enum Likelihood {
 208   // Unknown likelihood.
 209   UNKNOWN = 0;
 210
 211   // It is very unlikely that the image belongs to the specified vertical.
 212   VERY_UNLIKELY = 1;
 213
 214   // It is unlikely that the image belongs to the specified vertical.
 215   UNLIKELY = 2;
 216
 217   // It is possible that the image belongs to the specified vertical.
 218   POSSIBLE = 3;
 219
 220   // It is likely that the image belongs to the specified vertical.
 221   LIKELY = 4;
 222
 223   // It is very likely that the image belongs to the specified vertical.
 224   VERY_LIKELY = 5;
 225 }
 226
 227 // A face annotation object contains the results of face detection.
 228 message FaceAnnotation {
 229   // A face-specific landmark (for example, a face feature).
 230   message Landmark {
 231     // Face landmark (feature) type.
 232     // Left and right are defined from the vantage of the viewer of the image
 233     // without considering mirror projections typical of photos. So, `LEFT_EYE`,
 234     // typically, is the person's right eye.
 235     enum Type {
 236       // Unknown face landmark detected. Should not be filled.
 237       UNKNOWN_LANDMARK = 0;
 238
 239       // Left eye.
 240       LEFT_EYE = 1;
 241
 242       // Right eye.
 243       RIGHT_EYE = 2;
 244
 245       // Left of left eyebrow.
 246       LEFT_OF_LEFT_EYEBROW = 3;
 247
 248       // Right of left eyebrow.
 249       RIGHT_OF_LEFT_EYEBROW = 4;
 250
 251       // Left of right eyebrow.
 252       LEFT_OF_RIGHT_EYEBROW = 5;
 253
 254       // Right of right eyebrow.
 255       RIGHT_OF_RIGHT_EYEBROW = 6;
 256
 257       // Midpoint between eyes.
 258       MIDPOINT_BETWEEN_EYES = 7;
 259
 260       // Nose tip.
 261       NOSE_TIP = 8;
 262
 263       // Upper lip.
 264       UPPER_LIP = 9;
 265
 266       // Lower lip.
 267       LOWER_LIP = 10;
 268
 269       // Mouth left.
 270       MOUTH_LEFT = 11;
 271
 272       // Mouth right.
 273       MOUTH_RIGHT = 12;
 274
 275       // Mouth center.
 276       MOUTH_CENTER = 13;
 277
 278       // Nose, bottom right.
 279       NOSE_BOTTOM_RIGHT = 14;
 280
 281       // Nose, bottom left.
 282       NOSE_BOTTOM_LEFT = 15;
 283
 284       // Nose, bottom center.
 285       NOSE_BOTTOM_CENTER = 16;
 286
 287       // Left eye, top boundary.
 288       LEFT_EYE_TOP_BOUNDARY = 17;
 289
 290       // Left eye, right corner.
 291       LEFT_EYE_RIGHT_CORNER = 18;
 292
 293       // Left eye, bottom boundary.
 294       LEFT_EYE_BOTTOM_BOUNDARY = 19;
 295
 296       // Left eye, left corner.
 297       LEFT_EYE_LEFT_CORNER = 20;
 298
 299       // Right eye, top boundary.
 300       RIGHT_EYE_TOP_BOUNDARY = 21;
 301
 302       // Right eye, right corner.
 303       RIGHT_EYE_RIGHT_CORNER = 22;
 304
 305       // Right eye, bottom boundary.
 306       RIGHT_EYE_BOTTOM_BOUNDARY = 23;
 307
 308       // Right eye, left corner.
 309       RIGHT_EYE_LEFT_CORNER = 24;
 310
 311       // Left eyebrow, upper midpoint.
 312       LEFT_EYEBROW_UPPER_MIDPOINT = 25;
 313
 314       // Right eyebrow, upper midpoint.
 315       RIGHT_EYEBROW_UPPER_MIDPOINT = 26;
 316
 317       // Left ear tragion.
 318       LEFT_EAR_TRAGION = 27;
 319
 320       // Right ear tragion.
 321       RIGHT_EAR_TRAGION = 28;
 322
 323       // Left eye pupil.
 324       LEFT_EYE_PUPIL = 29;
 325
 326       // Right eye pupil.
 327       RIGHT_EYE_PUPIL = 30;
 328
 329       // Forehead glabella.
 330       FOREHEAD_GLABELLA = 31;
 331
 332       // Chin gnathion.
 333       CHIN_GNATHION = 32;
 334
 335       // Chin left gonion.
 336       CHIN_LEFT_GONION = 33;
 337
 338       // Chin right gonion.
 339       CHIN_RIGHT_GONION = 34;
 340     }
 341
 342     // Face landmark type.
 343     Type type = 3;
 344
 345     // Face landmark position.
 346     Position position = 4;
 347   }
 348
 349   // The bounding polygon around the face. The coordinates of the bounding box
 350   // are in the original image's scale.
 351   // The bounding box is computed to "frame" the face in accordance with human
 352   // expectations. It is based on the landmarker results.
 353   // Note that one or more x and/or y coordinates may not be generated in the
 354   // `BoundingPoly` (the polygon will be unbounded) if only a partial face
 355   // appears in the image to be annotated.
 356   BoundingPoly bounding_poly = 1;
 357
 358   // The `fd_bounding_poly` bounding polygon is tighter than the
 359   // `boundingPoly`, and encloses only the skin part of the face. Typically, it
 360   // is used to eliminate the face from any image analysis that detects the
 361   // "amount of skin" visible in an image. It is not based on the
 362   // landmarker results, only on the initial face detection, hence
 363   // the <code>fd</code> (face detection) prefix.
 364   BoundingPoly fd_bounding_poly = 2;
 365
 366   // Detected face landmarks.
 367   repeated Landmark landmarks = 3;
 368
 369   // Roll angle, which indicates the amount of clockwise/anti-clockwise rotation
 370   // of the face relative to the image vertical about the axis perpendicular to
 371   // the face. Range [-180,180].
 372   float roll_angle = 4;
 373
 374   // Yaw angle, which indicates the leftward/rightward angle that the face is
 375   // pointing relative to the vertical plane perpendicular to the image. Range
 376   // [-180,180].
 377   float pan_angle = 5;
 378
 379   // Pitch angle, which indicates the upwards/downwards angle that the face is
 380   // pointing relative to the image's horizontal plane. Range [-180,180].
 381   float tilt_angle = 6;
 382
 383   // Detection confidence. Range [0, 1].
 384   float detection_confidence = 7;
 385
 386   // Face landmarking confidence. Range [0, 1].
 387   float landmarking_confidence = 8;
 388
 389   // Joy likelihood.
 390   Likelihood joy_likelihood = 9;
 391
 392   // Sorrow likelihood.
 393   Likelihood sorrow_likelihood = 10;
 394
 395   // Anger likelihood.
 396   Likelihood anger_likelihood = 11;
 397
 398   // Surprise likelihood.
 399   Likelihood surprise_likelihood = 12;
 400
 401   // Under-exposed likelihood.
 402   Likelihood under_exposed_likelihood = 13;
 403
 404   // Blurred likelihood.
 405   Likelihood blurred_likelihood = 14;
 406
 407   // Headwear likelihood.
 408   Likelihood headwear_likelihood = 15;
 409 }
 410
 411 // Detected entity location information.
 412 message LocationInfo {
 413   // lat/long location coordinates.
 414   google.type.LatLng lat_lng = 1;
 415 }
 416
 417 // A `Property` consists of a user-supplied name/value pair.
 418 message Property {
 419   // Name of the property.
 420   string name = 1;
 421
 422   // Value of the property.
 423   string value = 2;
 424
 425   // Value of numeric properties.
 426   uint64 uint64_value = 3;
 427 }
 428
 429 // Set of detected entity features.
 430 message EntityAnnotation {
 431   // Opaque entity ID. Some IDs may be available in
 432   // [Google Knowledge Graph Search
 433   // API](https://developers.google.com/knowledge-graph/).
 434   string mid = 1;
 435
 436   // The language code for the locale in which the entity textual
 437   // `description` is expressed.
 438   string locale = 2;
 439
 440   // Entity textual description, expressed in its `locale` language.
 441   string description = 3;
 442
 443   // Overall score of the result. Range [0, 1].
 444   float score = 4;
 445
 446   // **Deprecated. Use `score` instead.**
 447   // The accuracy of the entity detection in an image.
 448   // For example, for an image in which the "Eiffel Tower" entity is detected,
 449   // this field represents the confidence that there is a tower in the query
 450   // image. Range [0, 1].
 451   float confidence = 5 [deprecated = true];
 452
 453   // The relevancy of the ICA (Image Content Annotation) label to the
 454   // image. For example, the relevancy of "tower" is likely higher to an image
 455   // containing the detected "Eiffel Tower" than to an image containing a
 456   // detected distant towering building, even though the confidence that
 457   // there is a tower in each image may be the same. Range [0, 1].
 458   float topicality = 6;
 459
 460   // Image region to which this entity belongs. Not produced
 461   // for `LABEL_DETECTION` features.
 462   BoundingPoly bounding_poly = 7;
 463
 464   // The location information for the detected entity. Multiple
 465   // `LocationInfo` elements can be present because one location may
 466   // indicate the location of the scene in the image, and another location
 467   // may indicate the location of the place where the image was taken.
 468   // Location information is usually present for landmarks.
 469   repeated LocationInfo locations = 8;
 470
 471   // Some entities may have optional user-supplied `Property` (name/value)
 472   // fields, such a score or string that qualifies the entity.
 473   repeated Property properties = 9;
 474 }
 475
 476 // Set of detected objects with bounding boxes.
 477 message LocalizedObjectAnnotation {
 478   // Object ID that should align with EntityAnnotation mid.
 479   string mid = 1;
 480
 481   // The BCP-47 language code, such as "en-US" or "sr-Latn". For more
 482   // information, see
 483   // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
 484   string language_code = 2;
 485
 486   // Object name, expressed in its `language_code` language.
 487   string name = 3;
 488
 489   // Score of the result. Range [0, 1].
 490   float score = 4;
 491
 492   // Image region to which this object belongs. This must be populated.
 493   BoundingPoly bounding_poly = 5;
 494 }
 495
 496 // Set of features pertaining to the image, computed by computer vision
 497 // methods over safe-search verticals (for example, adult, spoof, medical,
 498 // violence).
 499 message SafeSearchAnnotation {
 500   // Represents the adult content likelihood for the image. Adult content may
 501   // contain elements such as nudity, pornographic images or cartoons, or
 502   // sexual activities.
 503   Likelihood adult = 1;
 504
 505   // Spoof likelihood. The likelihood that an modification
 506   // was made to the image's canonical version to make it appear
 507   // funny or offensive.
 508   Likelihood spoof = 2;
 509
 510   // Likelihood that this is a medical image.
 511   Likelihood medical = 3;
 512
 513   // Likelihood that this image contains violent content.
 514   Likelihood violence = 4;
 515
 516   // Likelihood that the request image contains racy content. Racy content may
 517   // include (but is not limited to) skimpy or sheer clothing, strategically
 518   // covered nudity, lewd or provocative poses, or close-ups of sensitive
 519   // body areas.
 520   Likelihood racy = 9;
 521 }
 522
 523 // Rectangle determined by min and max `LatLng` pairs.
 524 message LatLongRect {
 525   // Min lat/long pair.
 526   google.type.LatLng min_lat_lng = 1;
 527
 528   // Max lat/long pair.
 529   google.type.LatLng max_lat_lng = 2;
 530 }
 531
 532 // Color information consists of RGB channels, score, and the fraction of
 533 // the image that the color occupies in the image.
 534 message ColorInfo {
 535   // RGB components of the color.
 536   google.type.Color color = 1;
 537
 538   // Image-specific score for this color. Value in range [0, 1].
 539   float score = 2;
 540
 541   // The fraction of pixels the color occupies in the image.
 542   // Value in range [0, 1].
 543   float pixel_fraction = 3;
 544 }
 545
 546 // Set of dominant colors and their corresponding scores.
 547 message DominantColorsAnnotation {
 548   // RGB color values with their score and pixel fraction.
 549   repeated ColorInfo colors = 1;
 550 }
 551
 552 // Stores image properties, such as dominant colors.
 553 message ImageProperties {
 554   // If present, dominant colors completed successfully.
 555   DominantColorsAnnotation dominant_colors = 1;
 556 }
 557
 558 // Single crop hint that is used to generate a new crop when serving an image.
 559 message CropHint {
 560   // The bounding polygon for the crop region. The coordinates of the bounding
 561   // box are in the original image's scale.
 562   BoundingPoly bounding_poly = 1;
 563
 564   // Confidence of this being a salient region.  Range [0, 1].
 565   float confidence = 2;
 566
 567   // Fraction of importance of this salient region with respect to the original
 568   // image.
 569   float importance_fraction = 3;
 570 }
 571
 572 // Set of crop hints that are used to generate new crops when serving images.
 573 message CropHintsAnnotation {
 574   // Crop hint results.
 575   repeated CropHint crop_hints = 1;
 576 }
 577
 578 // Parameters for crop hints annotation request.
 579 message CropHintsParams {
 580   // Aspect ratios in floats, representing the ratio of the width to the height
 581   // of the image. For example, if the desired aspect ratio is 4/3, the
 582   // corresponding float value should be 1.33333.  If not specified, the
 583   // best possible crop is returned. The number of provided aspect ratios is
 584   // limited to a maximum of 16; any aspect ratios provided after the 16th are
 585   // ignored.
 586   repeated float aspect_ratios = 1;
 587 }
 588
 589 // Parameters for web detection request.
 590 message WebDetectionParams {
 591   // Whether to include results derived from the geo information in the image.
 592   bool include_geo_results = 2;
 593 }
 594
 595 // Image context and/or feature-specific parameters.
 596 message ImageContext {
 597   // Not used.
 598   LatLongRect lat_long_rect = 1;
 599
 600   // List of languages to use for TEXT_DETECTION. In most cases, an empty value
 601   // yields the best results since it enables automatic language detection. For
 602   // languages based on the Latin alphabet, setting `language_hints` is not
 603   // needed. In rare cases, when the language of the text in the image is known,
 604   // setting a hint will help get better results (although it will be a
 605   // significant hindrance if the hint is wrong). Text detection returns an
 606   // error if one or more of the specified languages is not one of the
 607   // [supported languages](/vision/docs/languages).
 608   repeated string language_hints = 2;
 609
 610   // Parameters for crop hints annotation request.
 611   CropHintsParams crop_hints_params = 4;
 612
 613   // Parameters for product search.
 614   ProductSearchParams product_search_params = 5;
 615
 616   // Parameters for web detection.
 617   WebDetectionParams web_detection_params = 6;
 618 }
 619
 620 // Request for performing Google Cloud Vision API tasks over a user-provided
 621 // image, with user-requested features, and with context information.
 622 message AnnotateImageRequest {
 623   // The image to be processed.
 624   Image image = 1;
 625
 626   // Requested features.
 627   repeated Feature features = 2;
 628
 629   // Additional context that may accompany the image.
 630   ImageContext image_context = 3;
 631 }
 632
 633 // If an image was produced from a file (e.g. a PDF), this message gives
 634 // information about the source of that image.
 635 message ImageAnnotationContext {
 636   // The URI of the file used to produce the image.
 637   string uri = 1;
 638
 639   // If the file was a PDF or TIFF, this field gives the page number within
 640   // the file used to produce the image.
 641   int32 page_number = 2;
 642 }
 643
 644 // Response to an image annotation request.
 645 message AnnotateImageResponse {
 646   // If present, face detection has completed successfully.
 647   repeated FaceAnnotation face_annotations = 1;
 648
 649   // If present, landmark detection has completed successfully.
 650   repeated EntityAnnotation landmark_annotations = 2;
 651
 652   // If present, logo detection has completed successfully.
 653   repeated EntityAnnotation logo_annotations = 3;
 654
 655   // If present, label detection has completed successfully.
 656   repeated EntityAnnotation label_annotations = 4;
 657
 658   // If present, localized object detection has completed successfully.
 659   // This will be sorted descending by confidence score.
 660   repeated LocalizedObjectAnnotation localized_object_annotations = 22;
 661
 662   // If present, text (OCR) detection has completed successfully.
 663   repeated EntityAnnotation text_annotations = 5;
 664
 665   // If present, text (OCR) detection or document (OCR) text detection has
 666   // completed successfully.
 667   // This annotation provides the structural hierarchy for the OCR detected
 668   // text.
 669   TextAnnotation full_text_annotation = 12;
 670
 671   // If present, safe-search annotation has completed successfully.
 672   SafeSearchAnnotation safe_search_annotation = 6;
 673
 674   // If present, image properties were extracted successfully.
 675   ImageProperties image_properties_annotation = 8;
 676
 677   // If present, crop hints have completed successfully.
 678   CropHintsAnnotation crop_hints_annotation = 11;
 679
 680   // If present, web detection has completed successfully.
 681   WebDetection web_detection = 13;
 682
 683   // If present, product search has completed successfully.
 684   ProductSearchResults product_search_results = 14;
 685
 686   // If set, represents the error message for the operation.
 687   // Note that filled-in image annotations are guaranteed to be
 688   // correct, even when `error` is set.
 689   google.rpc.Status error = 9;
 690
 691   // If present, contextual information is needed to understand where this image
 692   // comes from.
 693   ImageAnnotationContext context = 21;
 694 }
 695
 696 // Response to a single file annotation request. A file may contain one or more
 697 // images, which individually have their own responses.
 698 message AnnotateFileResponse {
 699   // Information about the file for which this response is generated.
 700   InputConfig input_config = 1;
 701
 702   // Individual responses to images found within the file.
 703   repeated AnnotateImageResponse responses = 2;
 704
 705   // This field gives the total number of pages in the file.
 706   int32 total_pages = 3;
 707 }
 708
 709 // Multiple image annotation requests are batched into a single service call.
 710 message BatchAnnotateImagesRequest {
 711   // Individual image annotation requests for this batch.
 712   repeated AnnotateImageRequest requests = 1;
 713 }
 714
 715 // Response to a batch image annotation request.
 716 message BatchAnnotateImagesResponse {
 717   // Individual responses to image annotation requests within the batch.
 718   repeated AnnotateImageResponse responses = 1;
 719 }
 720
 721 // A request to annotate one single file, e.g. a PDF, TIFF or GIF file.
 722 message AnnotateFileRequest {
 723   // Required. Information about the input file.
 724   InputConfig input_config = 1;
 725
 726   // Required. Requested features.
 727   repeated Feature features = 2;
 728
 729   // Additional context that may accompany the image(s) in the file.
 730   ImageContext image_context = 3;
 731
 732   // Pages of the file to perform image annotation.
 733   //
 734   // Pages starts from 1, we assume the first page of the file is page 1.
 735   // At most 5 pages are supported per request. Pages can be negative.
 736   //
 737   // Page 1 means the first page.
 738   // Page 2 means the second page.
 739   // Page -1 means the last page.
 740   // Page -2 means the second to the last page.
 741   //
 742   // If the file is GIF instead of PDF or TIFF, page refers to GIF frames.
 743   //
 744   // If this field is empty, by default the service performs image annotation
 745   // for the first 5 pages of the file.
 746   repeated int32 pages = 4;
 747 }
 748
 749 // A list of requests to annotate files using the BatchAnnotateFiles API.
 750 message BatchAnnotateFilesRequest {
 751   // The list of file annotation requests. Right now we support only one
 752   // AnnotateFileRequest in BatchAnnotateFilesRequest.
 753   repeated AnnotateFileRequest requests = 1;
 754 }
 755
 756 // A list of file annotation responses.
 757 message BatchAnnotateFilesResponse {
 758   // The list of file annotation responses, each response corresponding to each
 759   // AnnotateFileRequest in BatchAnnotateFilesRequest.
 760   repeated AnnotateFileResponse responses = 1;
 761 }
 762
 763 // An offline file annotation request.
 764 message AsyncAnnotateFileRequest {
 765   // Required. Information about the input file.
 766   InputConfig input_config = 1;
 767
 768   // Required. Requested features.
 769   repeated Feature features = 2;
 770
 771   // Additional context that may accompany the image(s) in the file.
 772   ImageContext image_context = 3;
 773
 774   // Required. The desired output location and metadata (e.g. format).
 775   OutputConfig output_config = 4;
 776 }
 777
 778 // The response for a single offline file annotation request.
 779 message AsyncAnnotateFileResponse {
 780   // The output location and metadata from AsyncAnnotateFileRequest.
 781   OutputConfig output_config = 1;
 782 }
 783
 784 // Request for async image annotation for a list of images.
 785 message AsyncBatchAnnotateImagesRequest {
 786   // Individual image annotation requests for this batch.
 787   repeated AnnotateImageRequest requests = 1;
 788
 789   // Required. The desired output location and metadata (e.g. format).
 790   OutputConfig output_config = 2;
 791 }
 792
 793 // Response to an async batch image annotation request.
 794 message AsyncBatchAnnotateImagesResponse {
 795   // The output location and metadata from AsyncBatchAnnotateImagesRequest.
 796   OutputConfig output_config = 1;
 797 }
 798
 799 // Multiple async file annotation requests are batched into a single service
 800 // call.
 801 message AsyncBatchAnnotateFilesRequest {
 802   // Individual async file annotation requests for this batch.
 803   repeated AsyncAnnotateFileRequest requests = 1;
 804 }
 805
 806 // Response to an async batch file annotation request.
 807 message AsyncBatchAnnotateFilesResponse {
 808   // The list of file annotation responses, one for each request in
 809   // AsyncBatchAnnotateFilesRequest.
 810   repeated AsyncAnnotateFileResponse responses = 1;
 811 }
 812
 813 // The desired input location and metadata.
 814 message InputConfig {
 815   // The Google Cloud Storage location to read the input from.
 816   GcsSource gcs_source = 1;
 817
 818   // File content, represented as a stream of bytes.
 819   // Note: As with all `bytes` fields, protobuffers use a pure binary
 820   // representation, whereas JSON representations use base64.
 821   //
 822   // Currently, this field only works for BatchAnnotateFiles requests. It does
 823   // not work for AsyncBatchAnnotateFiles requests.
 824   bytes content = 3;
 825
 826   // The type of the file. Currently only "application/pdf" and "image/tiff"
 827   // are supported. Wildcards are not supported.
 828   string mime_type = 2;
 829 }
 830
 831 // The desired output location and metadata.
 832 message OutputConfig {
 833   // The Google Cloud Storage location to write the output(s) to.
 834   GcsDestination gcs_destination = 1;
 835
 836   // The max number of response protos to put into each output JSON file on
 837   // Google Cloud Storage.
 838   // The valid range is [1, 100]. If not specified, the default value is 20.
 839   //
 840   // For example, for one pdf file with 100 pages, 100 response protos will
 841   // be generated. If `batch_size` = 20, then 5 json files each
 842   // containing 20 response protos will be written under the prefix
 843   // `gcs_destination`.`uri`.
 844   //
 845   // Currently, batch_size only applies to GcsDestination, with potential future
 846   // support for other output configurations.
 847   int32 batch_size = 2;
 848 }
 849
 850 // The Google Cloud Storage location where the input will be read from.
 851 message GcsSource {
 852   // Google Cloud Storage URI for the input file. This must only be a
 853   // Google Cloud Storage object. Wildcards are not currently supported.
 854   string uri = 1;
 855 }
 856
 857 // The Google Cloud Storage location where the output will be written to.
 858 message GcsDestination {
 859   // Google Cloud Storage URI where the results will be stored. Results will
 860   // be in JSON format and preceded by its corresponding input URI. This field
 861   // can either represent a single file, or a prefix for multiple outputs.
 862   // Prefixes must end in a `/`.
 863   //
 864   // Examples:
 865   //
 866   // *    File: gs://bucket-name/filename.json
 867   // *    Prefix: gs://bucket-name/prefix/here/
 868   // *    File: gs://bucket-name/prefix/here
 869   //
 870   // If multiple outputs, each response is still AnnotateFileResponse, each of
 871   // which contains some subset of the full list of AnnotateImageResponse.
 872   // Multiple outputs can happen if, for example, the output JSON is too large
 873   // and overflows into multiple sharded files.
 874   string uri = 1;
 875 }
 876
 877 // Contains metadata for the BatchAnnotateImages operation.
 878 message OperationMetadata {
 879   // Batch operation states.
 880   enum State {
 881     // Invalid.
 882     STATE_UNSPECIFIED = 0;
 883
 884     // Request is received.
 885     CREATED = 1;
 886
 887     // Request is actively being processed.
 888     RUNNING = 2;
 889
 890     // The batch processing is done.
 891     DONE = 3;
 892
 893     // The batch processing was cancelled.
 894     CANCELLED = 4;
 895   }
 896
 897   // Current state of the batch operation.
 898   State state = 1;
 899
 900   // The time when the batch request was received.
 901   google.protobuf.Timestamp create_time = 5;
 902
 903   // The time when the operation result was last updated.
 904   google.protobuf.Timestamp update_time = 6;
 905 }