legacy-libs/google-proto-files/google/cloud/vision/v1/image_annotator.proto

   1 // Copyright 2018 Google LLC.
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //     http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14 //
  15
  16 syntax = "proto3";
  17
  18 package google.cloud.vision.v1;
  19
  20 import "google/api/annotations.proto";
  21 import "google/cloud/vision/v1/geometry.proto";
  22 import "google/cloud/vision/v1/product_search.proto";
  23 import "google/cloud/vision/v1/text_annotation.proto";
  24 import "google/cloud/vision/v1/web_detection.proto";
  25 import "google/longrunning/operations.proto";
  26 import "google/protobuf/field_mask.proto";
  27 import "google/protobuf/timestamp.proto";
  28 import "google/rpc/status.proto";
  29 import "google/type/color.proto";
  30 import "google/type/latlng.proto";
  31
  32 option cc_enable_arenas = true;
  33 option go_package = "google.golang.org/genproto/googleapis/cloud/vision/v1;vision";
  34 option java_multiple_files = true;
  35 option java_outer_classname = "ImageAnnotatorProto";
  36 option java_package = "com.google.cloud.vision.v1";
  37 option objc_class_prefix = "GCVN";
  38
  39 // Service that performs Google Cloud Vision API detection tasks over client
  40 // images, such as face, landmark, logo, label, and text detection. The
  41 // ImageAnnotator service returns detected entities from the images.
  42 service ImageAnnotator {
  43   // Run image detection and annotation for a batch of images.
  44   rpc BatchAnnotateImages(BatchAnnotateImagesRequest)
  45       returns (BatchAnnotateImagesResponse) {
  46     option (google.api.http) = {
  47       post: "/v1/images:annotate"
  48       body: "*"
  49     };
  50   }
  51
  52   // Run asynchronous image detection and annotation for a list of generic
  53   // files, such as PDF files, which may contain multiple pages and multiple
  54   // images per page. Progress and results can be retrieved through the
  55   // `google.longrunning.Operations` interface.
  56   // `Operation.metadata` contains `OperationMetadata` (metadata).
  57   // `Operation.response` contains `AsyncBatchAnnotateFilesResponse` (results).
  58   rpc AsyncBatchAnnotateFiles(AsyncBatchAnnotateFilesRequest)
  59       returns (google.longrunning.Operation) {
  60     option (google.api.http) = {
  61       post: "/v1/files:asyncBatchAnnotate"
  62       body: "*"
  63     };
  64   }
  65 }
  66
  67 // The type of Google Cloud Vision API detection to perform, and the maximum
  68 // number of results to return for that type. Multiple `Feature` objects can
  69 // be specified in the `features` list.
  70 message Feature {
  71   // Type of Google Cloud Vision API feature to be extracted.
  72   enum Type {
  73     // Unspecified feature type.
  74     TYPE_UNSPECIFIED = 0;
  75
  76     // Run face detection.
  77     FACE_DETECTION = 1;
  78
  79     // Run landmark detection.
  80     LANDMARK_DETECTION = 2;
  81
  82     // Run logo detection.
  83     LOGO_DETECTION = 3;
  84
  85     // Run label detection.
  86     LABEL_DETECTION = 4;
  87
  88     // Run text detection / optical character recognition (OCR). Text detection
  89     // is optimized for areas of text within a larger image; if the image is
  90     // a document, use `DOCUMENT_TEXT_DETECTION` instead.
  91     TEXT_DETECTION = 5;
  92
  93     // Run dense text document OCR. Takes precedence when both
  94     // `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` are present.
  95     DOCUMENT_TEXT_DETECTION = 11;
  96
  97     // Run Safe Search to detect potentially unsafe
  98     // or undesirable content.
  99     SAFE_SEARCH_DETECTION = 6;
 100
 101     // Compute a set of image properties, such as the
 102     // image's dominant colors.
 103     IMAGE_PROPERTIES = 7;
 104
 105     // Run crop hints.
 106     CROP_HINTS = 9;
 107
 108     // Run web detection.
 109     WEB_DETECTION = 10;
 110
 111     // Run Product Search.
 112     PRODUCT_SEARCH = 12;
 113
 114     // Run localizer for object detection.
 115     OBJECT_LOCALIZATION = 19;
 116   }
 117
 118   // The feature type.
 119   Type type = 1;
 120
 121   // Maximum number of results of this type. Does not apply to
 122   // `TEXT_DETECTION`, `DOCUMENT_TEXT_DETECTION`, or `CROP_HINTS`.
 123   int32 max_results = 2;
 124
 125   // Model to use for the feature.
 126   // Supported values: "builtin/stable" (the default if unset) and
 127   // "builtin/latest".
 128   string model = 3;
 129 }
 130
 131 // External image source (Google Cloud Storage or web URL image location).
 132 message ImageSource {
 133   // **Use `image_uri` instead.**
 134   //
 135   // The Google Cloud Storage  URI of the form
 136   // `gs://bucket_name/object_name`. Object versioning is not supported. See
 137   // [Google Cloud Storage Request
 138   // URIs](https://cloud.google.com/storage/docs/reference-uris) for more info.
 139   string gcs_image_uri = 1;
 140
 141   // The URI of the source image. Can be either:
 142   //
 143   // 1. A Google Cloud Storage URI of the form
 144   //    `gs://bucket_name/object_name`. Object versioning is not supported. See
 145   //    [Google Cloud Storage Request
 146   //    URIs](https://cloud.google.com/storage/docs/reference-uris) for more
 147   //    info.
 148   //
 149   // 2. A publicly-accessible image HTTP/HTTPS URL. When fetching images from
 150   //    HTTP/HTTPS URLs, Google cannot guarantee that the request will be
 151   //    completed. Your request may fail if the specified host denies the
 152   //    request (e.g. due to request throttling or DOS prevention), or if Google
 153   //    throttles requests to the site for abuse prevention. You should not
 154   //    depend on externally-hosted images for production applications.
 155   //
 156   // When both `gcs_image_uri` and `image_uri` are specified, `image_uri` takes
 157   // precedence.
 158   string image_uri = 2;
 159 }
 160
 161 // Client image to perform Google Cloud Vision API tasks over.
 162 message Image {
 163   // Image content, represented as a stream of bytes.
 164   // Note: As with all `bytes` fields, protobuffers use a pure binary
 165   // representation, whereas JSON representations use base64.
 166   bytes content = 1;
 167
 168   // Google Cloud Storage image location, or publicly-accessible image
 169   // URL. If both `content` and `source` are provided for an image, `content`
 170   // takes precedence and is used to perform the image annotation request.
 171   ImageSource source = 2;
 172 }
 173
 174 // A face annotation object contains the results of face detection.
 175 message FaceAnnotation {
 176   // A face-specific landmark (for example, a face feature).
 177   message Landmark {
 178     // Face landmark (feature) type.
 179     // Left and right are defined from the vantage of the viewer of the image
 180     // without considering mirror projections typical of photos. So, `LEFT_EYE`,
 181     // typically, is the person's right eye.
 182     enum Type {
 183       // Unknown face landmark detected. Should not be filled.
 184       UNKNOWN_LANDMARK = 0;
 185
 186       // Left eye.
 187       LEFT_EYE = 1;
 188
 189       // Right eye.
 190       RIGHT_EYE = 2;
 191
 192       // Left of left eyebrow.
 193       LEFT_OF_LEFT_EYEBROW = 3;
 194
 195       // Right of left eyebrow.
 196       RIGHT_OF_LEFT_EYEBROW = 4;
 197
 198       // Left of right eyebrow.
 199       LEFT_OF_RIGHT_EYEBROW = 5;
 200
 201       // Right of right eyebrow.
 202       RIGHT_OF_RIGHT_EYEBROW = 6;
 203
 204       // Midpoint between eyes.
 205       MIDPOINT_BETWEEN_EYES = 7;
 206
 207       // Nose tip.
 208       NOSE_TIP = 8;
 209
 210       // Upper lip.
 211       UPPER_LIP = 9;
 212
 213       // Lower lip.
 214       LOWER_LIP = 10;
 215
 216       // Mouth left.
 217       MOUTH_LEFT = 11;
 218
 219       // Mouth right.
 220       MOUTH_RIGHT = 12;
 221
 222       // Mouth center.
 223       MOUTH_CENTER = 13;
 224
 225       // Nose, bottom right.
 226       NOSE_BOTTOM_RIGHT = 14;
 227
 228       // Nose, bottom left.
 229       NOSE_BOTTOM_LEFT = 15;
 230
 231       // Nose, bottom center.
 232       NOSE_BOTTOM_CENTER = 16;
 233
 234       // Left eye, top boundary.
 235       LEFT_EYE_TOP_BOUNDARY = 17;
 236
 237       // Left eye, right corner.
 238       LEFT_EYE_RIGHT_CORNER = 18;
 239
 240       // Left eye, bottom boundary.
 241       LEFT_EYE_BOTTOM_BOUNDARY = 19;
 242
 243       // Left eye, left corner.
 244       LEFT_EYE_LEFT_CORNER = 20;
 245
 246       // Right eye, top boundary.
 247       RIGHT_EYE_TOP_BOUNDARY = 21;
 248
 249       // Right eye, right corner.
 250       RIGHT_EYE_RIGHT_CORNER = 22;
 251
 252       // Right eye, bottom boundary.
 253       RIGHT_EYE_BOTTOM_BOUNDARY = 23;
 254
 255       // Right eye, left corner.
 256       RIGHT_EYE_LEFT_CORNER = 24;
 257
 258       // Left eyebrow, upper midpoint.
 259       LEFT_EYEBROW_UPPER_MIDPOINT = 25;
 260
 261       // Right eyebrow, upper midpoint.
 262       RIGHT_EYEBROW_UPPER_MIDPOINT = 26;
 263
 264       // Left ear tragion.
 265       LEFT_EAR_TRAGION = 27;
 266
 267       // Right ear tragion.
 268       RIGHT_EAR_TRAGION = 28;
 269
 270       // Left eye pupil.
 271       LEFT_EYE_PUPIL = 29;
 272
 273       // Right eye pupil.
 274       RIGHT_EYE_PUPIL = 30;
 275
 276       // Forehead glabella.
 277       FOREHEAD_GLABELLA = 31;
 278
 279       // Chin gnathion.
 280       CHIN_GNATHION = 32;
 281
 282       // Chin left gonion.
 283       CHIN_LEFT_GONION = 33;
 284
 285       // Chin right gonion.
 286       CHIN_RIGHT_GONION = 34;
 287     }
 288
 289     // Face landmark type.
 290     Type type = 3;
 291
 292     // Face landmark position.
 293     Position position = 4;
 294   }
 295
 296   // The bounding polygon around the face. The coordinates of the bounding box
 297   // are in the original image's scale, as returned in `ImageParams`.
 298   // The bounding box is computed to "frame" the face in accordance with human
 299   // expectations. It is based on the landmarker results.
 300   // Note that one or more x and/or y coordinates may not be generated in the
 301   // `BoundingPoly` (the polygon will be unbounded) if only a partial face
 302   // appears in the image to be annotated.
 303   BoundingPoly bounding_poly = 1;
 304
 305   // The `fd_bounding_poly` bounding polygon is tighter than the
 306   // `boundingPoly`, and encloses only the skin part of the face. Typically, it
 307   // is used to eliminate the face from any image analysis that detects the
 308   // "amount of skin" visible in an image. It is not based on the
 309   // landmarker results, only on the initial face detection, hence
 310   // the <code>fd</code> (face detection) prefix.
 311   BoundingPoly fd_bounding_poly = 2;
 312
 313   // Detected face landmarks.
 314   repeated Landmark landmarks = 3;
 315
 316   // Roll angle, which indicates the amount of clockwise/anti-clockwise rotation
 317   // of the face relative to the image vertical about the axis perpendicular to
 318   // the face. Range [-180,180].
 319   float roll_angle = 4;
 320
 321   // Yaw angle, which indicates the leftward/rightward angle that the face is
 322   // pointing relative to the vertical plane perpendicular to the image. Range
 323   // [-180,180].
 324   float pan_angle = 5;
 325
 326   // Pitch angle, which indicates the upwards/downwards angle that the face is
 327   // pointing relative to the image's horizontal plane. Range [-180,180].
 328   float tilt_angle = 6;
 329
 330   // Detection confidence. Range [0, 1].
 331   float detection_confidence = 7;
 332
 333   // Face landmarking confidence. Range [0, 1].
 334   float landmarking_confidence = 8;
 335
 336   // Joy likelihood.
 337   Likelihood joy_likelihood = 9;
 338
 339   // Sorrow likelihood.
 340   Likelihood sorrow_likelihood = 10;
 341
 342   // Anger likelihood.
 343   Likelihood anger_likelihood = 11;
 344
 345   // Surprise likelihood.
 346   Likelihood surprise_likelihood = 12;
 347
 348   // Under-exposed likelihood.
 349   Likelihood under_exposed_likelihood = 13;
 350
 351   // Blurred likelihood.
 352   Likelihood blurred_likelihood = 14;
 353
 354   // Headwear likelihood.
 355   Likelihood headwear_likelihood = 15;
 356 }
 357
 358 // Detected entity location information.
 359 message LocationInfo {
 360   // lat/long location coordinates.
 361   google.type.LatLng lat_lng = 1;
 362 }
 363
 364 // A `Property` consists of a user-supplied name/value pair.
 365 message Property {
 366   // Name of the property.
 367   string name = 1;
 368
 369   // Value of the property.
 370   string value = 2;
 371
 372   // Value of numeric properties.
 373   uint64 uint64_value = 3;
 374 }
 375
 376 // Set of detected entity features.
 377 message EntityAnnotation {
 378   // Opaque entity ID. Some IDs may be available in
 379   // [Google Knowledge Graph Search
 380   // API](https://developers.google.com/knowledge-graph/).
 381   string mid = 1;
 382
 383   // The language code for the locale in which the entity textual
 384   // `description` is expressed.
 385   string locale = 2;
 386
 387   // Entity textual description, expressed in its `locale` language.
 388   string description = 3;
 389
 390   // Overall score of the result. Range [0, 1].
 391   float score = 4;
 392
 393   // **Deprecated. Use `score` instead.**
 394   // The accuracy of the entity detection in an image.
 395   // For example, for an image in which the "Eiffel Tower" entity is detected,
 396   // this field represents the confidence that there is a tower in the query
 397   // image. Range [0, 1].
 398   float confidence = 5 [deprecated = true];
 399
 400   // The relevancy of the ICA (Image Content Annotation) label to the
 401   // image. For example, the relevancy of "tower" is likely higher to an image
 402   // containing the detected "Eiffel Tower" than to an image containing a
 403   // detected distant towering building, even though the confidence that
 404   // there is a tower in each image may be the same. Range [0, 1].
 405   float topicality = 6;
 406
 407   // Image region to which this entity belongs. Not produced
 408   // for `LABEL_DETECTION` features.
 409   BoundingPoly bounding_poly = 7;
 410
 411   // The location information for the detected entity. Multiple
 412   // `LocationInfo` elements can be present because one location may
 413   // indicate the location of the scene in the image, and another location
 414   // may indicate the location of the place where the image was taken.
 415   // Location information is usually present for landmarks.
 416   repeated LocationInfo locations = 8;
 417
 418   // Some entities may have optional user-supplied `Property` (name/value)
 419   // fields, such a score or string that qualifies the entity.
 420   repeated Property properties = 9;
 421 }
 422
 423 // Set of detected objects with bounding boxes.
 424 message LocalizedObjectAnnotation {
 425   // Object ID that should align with EntityAnnotation mid.
 426   string mid = 1;
 427
 428   // The BCP-47 language code, such as "en-US" or "sr-Latn". For more
 429   // information, see
 430   // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
 431   string language_code = 2;
 432
 433   // Object name, expressed in its `language_code` language.
 434   string name = 3;
 435
 436   // Score of the result. Range [0, 1].
 437   float score = 4;
 438
 439   // Image region to which this object belongs. This must be populated.
 440   BoundingPoly bounding_poly = 5;
 441 }
 442
 443 // Set of features pertaining to the image, computed by computer vision
 444 // methods over safe-search verticals (for example, adult, spoof, medical,
 445 // violence).
 446 message SafeSearchAnnotation {
 447   // Represents the adult content likelihood for the image. Adult content may
 448   // contain elements such as nudity, pornographic images or cartoons, or
 449   // sexual activities.
 450   Likelihood adult = 1;
 451
 452   // Spoof likelihood. The likelihood that an modification
 453   // was made to the image's canonical version to make it appear
 454   // funny or offensive.
 455   Likelihood spoof = 2;
 456
 457   // Likelihood that this is a medical image.
 458   Likelihood medical = 3;
 459
 460   // Likelihood that this image contains violent content.
 461   Likelihood violence = 4;
 462
 463   // Likelihood that the request image contains racy content. Racy content may
 464   // include (but is not limited to) skimpy or sheer clothing, strategically
 465   // covered nudity, lewd or provocative poses, or close-ups of sensitive
 466   // body areas.
 467   Likelihood racy = 9;
 468 }
 469
 470 // Rectangle determined by min and max `LatLng` pairs.
 471 message LatLongRect {
 472   // Min lat/long pair.
 473   google.type.LatLng min_lat_lng = 1;
 474
 475   // Max lat/long pair.
 476   google.type.LatLng max_lat_lng = 2;
 477 }
 478
 479 // Color information consists of RGB channels, score, and the fraction of
 480 // the image that the color occupies in the image.
 481 message ColorInfo {
 482   // RGB components of the color.
 483   google.type.Color color = 1;
 484
 485   // Image-specific score for this color. Value in range [0, 1].
 486   float score = 2;
 487
 488   // The fraction of pixels the color occupies in the image.
 489   // Value in range [0, 1].
 490   float pixel_fraction = 3;
 491 }
 492
 493 // Set of dominant colors and their corresponding scores.
 494 message DominantColorsAnnotation {
 495   // RGB color values with their score and pixel fraction.
 496   repeated ColorInfo colors = 1;
 497 }
 498
 499 // Stores image properties, such as dominant colors.
 500 message ImageProperties {
 501   // If present, dominant colors completed successfully.
 502   DominantColorsAnnotation dominant_colors = 1;
 503 }
 504
 505 // Single crop hint that is used to generate a new crop when serving an image.
 506 message CropHint {
 507   // The bounding polygon for the crop region. The coordinates of the bounding
 508   // box are in the original image's scale, as returned in `ImageParams`.
 509   BoundingPoly bounding_poly = 1;
 510
 511   // Confidence of this being a salient region.  Range [0, 1].
 512   float confidence = 2;
 513
 514   // Fraction of importance of this salient region with respect to the original
 515   // image.
 516   float importance_fraction = 3;
 517 }
 518
 519 // Set of crop hints that are used to generate new crops when serving images.
 520 message CropHintsAnnotation {
 521   // Crop hint results.
 522   repeated CropHint crop_hints = 1;
 523 }
 524
 525 // Parameters for crop hints annotation request.
 526 message CropHintsParams {
 527   // Aspect ratios in floats, representing the ratio of the width to the height
 528   // of the image. For example, if the desired aspect ratio is 4/3, the
 529   // corresponding float value should be 1.33333.  If not specified, the
 530   // best possible crop is returned. The number of provided aspect ratios is
 531   // limited to a maximum of 16; any aspect ratios provided after the 16th are
 532   // ignored.
 533   repeated float aspect_ratios = 1;
 534 }
 535
 536 // Parameters for web detection request.
 537 message WebDetectionParams {
 538   // Whether to include results derived from the geo information in the image.
 539   bool include_geo_results = 2;
 540 }
 541
 542 // Image context and/or feature-specific parameters.
 543 message ImageContext {
 544   // Not used.
 545   LatLongRect lat_long_rect = 1;
 546
 547   // List of languages to use for TEXT_DETECTION. In most cases, an empty value
 548   // yields the best results since it enables automatic language detection. For
 549   // languages based on the Latin alphabet, setting `language_hints` is not
 550   // needed. In rare cases, when the language of the text in the image is known,
 551   // setting a hint will help get better results (although it will be a
 552   // significant hindrance if the hint is wrong). Text detection returns an
 553   // error if one or more of the specified languages is not one of the
 554   // [supported languages](/vision/docs/languages).
 555   repeated string language_hints = 2;
 556
 557   // Parameters for crop hints annotation request.
 558   CropHintsParams crop_hints_params = 4;
 559
 560   // Parameters for product search.
 561   ProductSearchParams product_search_params = 5;
 562
 563   // Parameters for web detection.
 564   WebDetectionParams web_detection_params = 6;
 565 }
 566
 567 // Request for performing Google Cloud Vision API tasks over a user-provided
 568 // image, with user-requested features.
 569 message AnnotateImageRequest {
 570   // The image to be processed.
 571   Image image = 1;
 572
 573   // Requested features.
 574   repeated Feature features = 2;
 575
 576   // Additional context that may accompany the image.
 577   ImageContext image_context = 3;
 578 }
 579
 580 // If an image was produced from a file (e.g. a PDF), this message gives
 581 // information about the source of that image.
 582 message ImageAnnotationContext {
 583   // The URI of the file used to produce the image.
 584   string uri = 1;
 585
 586   // If the file was a PDF or TIFF, this field gives the page number within
 587   // the file used to produce the image.
 588   int32 page_number = 2;
 589 }
 590
 591 // Response to an image annotation request.
 592 message AnnotateImageResponse {
 593   // If present, face detection has completed successfully.
 594   repeated FaceAnnotation face_annotations = 1;
 595
 596   // If present, landmark detection has completed successfully.
 597   repeated EntityAnnotation landmark_annotations = 2;
 598
 599   // If present, logo detection has completed successfully.
 600   repeated EntityAnnotation logo_annotations = 3;
 601
 602   // If present, label detection has completed successfully.
 603   repeated EntityAnnotation label_annotations = 4;
 604
 605   // If present, localized object detection has completed successfully.
 606   // This will be sorted descending by confidence score.
 607   repeated LocalizedObjectAnnotation localized_object_annotations = 22;
 608
 609   // If present, text (OCR) detection has completed successfully.
 610   repeated EntityAnnotation text_annotations = 5;
 611
 612   // If present, text (OCR) detection or document (OCR) text detection has
 613   // completed successfully.
 614   // This annotation provides the structural hierarchy for the OCR detected
 615   // text.
 616   TextAnnotation full_text_annotation = 12;
 617
 618   // If present, safe-search annotation has completed successfully.
 619   SafeSearchAnnotation safe_search_annotation = 6;
 620
 621   // If present, image properties were extracted successfully.
 622   ImageProperties image_properties_annotation = 8;
 623
 624   // If present, crop hints have completed successfully.
 625   CropHintsAnnotation crop_hints_annotation = 11;
 626
 627   // If present, web detection has completed successfully.
 628   WebDetection web_detection = 13;
 629
 630   // If present, product search has completed successfully.
 631   ProductSearchResults product_search_results = 14;
 632
 633   // If set, represents the error message for the operation.
 634   // Note that filled-in image annotations are guaranteed to be
 635   // correct, even when `error` is set.
 636   google.rpc.Status error = 9;
 637
 638   // If present, contextual information is needed to understand where this image
 639   // comes from.
 640   ImageAnnotationContext context = 21;
 641 }
 642
 643 // Response to a single file annotation request. A file may contain one or more
 644 // images, which individually have their own responses.
 645 message AnnotateFileResponse {
 646   // Information about the file for which this response is generated.
 647   InputConfig input_config = 1;
 648
 649   // Individual responses to images found within the file.
 650   repeated AnnotateImageResponse responses = 2;
 651 }
 652
 653 // Multiple image annotation requests are batched into a single service call.
 654 message BatchAnnotateImagesRequest {
 655   // Individual image annotation requests for this batch.
 656   repeated AnnotateImageRequest requests = 1;
 657 }
 658
 659 // Response to a batch image annotation request.
 660 message BatchAnnotateImagesResponse {
 661   // Individual responses to image annotation requests within the batch.
 662   repeated AnnotateImageResponse responses = 1;
 663 }
 664
 665 // An offline file annotation request.
 666 message AsyncAnnotateFileRequest {
 667   // Required. Information about the input file.
 668   InputConfig input_config = 1;
 669
 670   // Required. Requested features.
 671   repeated Feature features = 2;
 672
 673   // Additional context that may accompany the image(s) in the file.
 674   ImageContext image_context = 3;
 675
 676   // Required. The desired output location and metadata (e.g. format).
 677   OutputConfig output_config = 4;
 678 }
 679
 680 // The response for a single offline file annotation request.
 681 message AsyncAnnotateFileResponse {
 682   // The output location and metadata from AsyncAnnotateFileRequest.
 683   OutputConfig output_config = 1;
 684 }
 685
 686 // Multiple async file annotation requests are batched into a single service
 687 // call.
 688 message AsyncBatchAnnotateFilesRequest {
 689   // Individual async file annotation requests for this batch.
 690   repeated AsyncAnnotateFileRequest requests = 1;
 691 }
 692
 693 // Response to an async batch file annotation request.
 694 message AsyncBatchAnnotateFilesResponse {
 695   // The list of file annotation responses, one for each request in
 696   // AsyncBatchAnnotateFilesRequest.
 697   repeated AsyncAnnotateFileResponse responses = 1;
 698 }
 699
 700 // The desired input location and metadata.
 701 message InputConfig {
 702   // The Google Cloud Storage location to read the input from.
 703   GcsSource gcs_source = 1;
 704
 705   // The type of the file. Currently only "application/pdf" and "image/tiff"
 706   // are supported. Wildcards are not supported.
 707   string mime_type = 2;
 708 }
 709
 710 // The desired output location and metadata.
 711 message OutputConfig {
 712   // The Google Cloud Storage location to write the output(s) to.
 713   GcsDestination gcs_destination = 1;
 714
 715   // The max number of response protos to put into each output JSON file on
 716   // Google Cloud Storage.
 717   // The valid range is [1, 100]. If not specified, the default value is 20.
 718   //
 719   // For example, for one pdf file with 100 pages, 100 response protos will
 720   // be generated. If `batch_size` = 20, then 5 json files each
 721   // containing 20 response protos will be written under the prefix
 722   // `gcs_destination`.`uri`.
 723   //
 724   // Currently, batch_size only applies to GcsDestination, with potential future
 725   // support for other output configurations.
 726   int32 batch_size = 2;
 727 }
 728
 729 // The Google Cloud Storage location where the input will be read from.
 730 message GcsSource {
 731   // Google Cloud Storage URI for the input file. This must only be a
 732   // Google Cloud Storage object. Wildcards are not currently supported.
 733   string uri = 1;
 734 }
 735
 736 // The Google Cloud Storage location where the output will be written to.
 737 message GcsDestination {
 738   // Google Cloud Storage URI where the results will be stored. Results will
 739   // be in JSON format and preceded by its corresponding input URI. This field
 740   // can either represent a single file, or a prefix for multiple outputs.
 741   // Prefixes must end in a `/`.
 742   //
 743   // Examples:
 744   //
 745   // *    File: gs://bucket-name/filename.json
 746   // *    Prefix: gs://bucket-name/prefix/here/
 747   // *    File: gs://bucket-name/prefix/here
 748   //
 749   // If multiple outputs, each response is still AnnotateFileResponse, each of
 750   // which contains some subset of the full list of AnnotateImageResponse.
 751   // Multiple outputs can happen if, for example, the output JSON is too large
 752   // and overflows into multiple sharded files.
 753   string uri = 1;
 754 }
 755
 756 // Contains metadata for the BatchAnnotateImages operation.
 757 message OperationMetadata {
 758   // Batch operation states.
 759   enum State {
 760     // Invalid.
 761     STATE_UNSPECIFIED = 0;
 762
 763     // Request is received.
 764     CREATED = 1;
 765
 766     // Request is actively being processed.
 767     RUNNING = 2;
 768
 769     // The batch processing is done.
 770     DONE = 3;
 771
 772     // The batch processing was cancelled.
 773     CANCELLED = 4;
 774   }
 775
 776   // Current state of the batch operation.
 777   State state = 1;
 778
 779   // The time when the batch request was received.
 780   google.protobuf.Timestamp create_time = 5;
 781
 782   // The time when the operation result was last updated.
 783   google.protobuf.Timestamp update_time = 6;
 784 }
 785
 786 // A bucketized representation of likelihood, which is intended to give clients
 787 // highly stable results across model upgrades.
 788 enum Likelihood {
 789   // Unknown likelihood.
 790   UNKNOWN = 0;
 791
 792   // It is very unlikely that the image belongs to the specified vertical.
 793   VERY_UNLIKELY = 1;
 794
 795   // It is unlikely that the image belongs to the specified vertical.
 796   UNLIKELY = 2;
 797
 798   // It is possible that the image belongs to the specified vertical.
 799   POSSIBLE = 3;
 800
 801   // It is likely that the image belongs to the specified vertical.
 802   LIKELY = 4;
 803
 804   // It is very likely that the image belongs to the specified vertical.
 805   VERY_LIKELY = 5;
 806 }