legacy-libs/google-proto-files/google/cloud/vision/v1p3beta1/image_annotator.proto

   1 // Copyright 2018 Google Inc.
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //     http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15 syntax = "proto3";
  16
  17 package google.cloud.vision.v1p3beta1;
  18
  19 import "google/api/annotations.proto";
  20 import "google/cloud/vision/v1p3beta1/geometry.proto";
  21 import "google/cloud/vision/v1p3beta1/product_search.proto";
  22 import "google/cloud/vision/v1p3beta1/text_annotation.proto";
  23 import "google/cloud/vision/v1p3beta1/web_detection.proto";
  24 import "google/longrunning/operations.proto";
  25 import "google/protobuf/timestamp.proto";
  26 import "google/rpc/status.proto";
  27 import "google/type/color.proto";
  28 import "google/type/latlng.proto";
  29
  30 option cc_enable_arenas = true;
  31 option go_package = "google.golang.org/genproto/googleapis/cloud/vision/v1p3beta1;vision";
  32 option java_multiple_files = true;
  33 option java_outer_classname = "ImageAnnotatorProto";
  34 option java_package = "com.google.cloud.vision.v1p3beta1";
  35
  36 // Service that performs Google Cloud Vision API detection tasks over client
  37 // images, such as face, landmark, logo, label, and text detection. The
  38 // ImageAnnotator service returns detected entities from the images.
  39 service ImageAnnotator {
  40   // Run image detection and annotation for a batch of images.
  41   rpc BatchAnnotateImages(BatchAnnotateImagesRequest)
  42       returns (BatchAnnotateImagesResponse) {
  43     option (google.api.http) = {
  44       post: "/v1p3beta1/images:annotate"
  45       body: "*"
  46     };
  47   }
  48
  49   // Run asynchronous image detection and annotation for a list of generic
  50   // files, such as PDF files, which may contain multiple pages and multiple
  51   // images per page. Progress and results can be retrieved through the
  52   // `google.longrunning.Operations` interface.
  53   // `Operation.metadata` contains `OperationMetadata` (metadata).
  54   // `Operation.response` contains `AsyncBatchAnnotateFilesResponse` (results).
  55   rpc AsyncBatchAnnotateFiles(AsyncBatchAnnotateFilesRequest)
  56       returns (google.longrunning.Operation) {
  57     option (google.api.http) = {
  58       post: "/v1p3beta1/files:asyncBatchAnnotate"
  59       body: "*"
  60     };
  61   }
  62 }
  63
  64 // The type of Google Cloud Vision API detection to perform, and the maximum
  65 // number of results to return for that type. Multiple `Feature` objects can
  66 // be specified in the `features` list.
  67 message Feature {
  68   // Type of Google Cloud Vision API feature to be extracted.
  69   enum Type {
  70     // Unspecified feature type.
  71     TYPE_UNSPECIFIED = 0;
  72
  73     // Run face detection.
  74     FACE_DETECTION = 1;
  75
  76     // Run landmark detection.
  77     LANDMARK_DETECTION = 2;
  78
  79     // Run logo detection.
  80     LOGO_DETECTION = 3;
  81
  82     // Run label detection.
  83     LABEL_DETECTION = 4;
  84
  85     // Run text detection / optical character recognition (OCR). Text detection
  86     // is optimized for areas of text within a larger image; if the image is
  87     // a document, use `DOCUMENT_TEXT_DETECTION` instead.
  88     TEXT_DETECTION = 5;
  89
  90     // Run dense text document OCR. Takes precedence when both
  91     // `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` are present.
  92     DOCUMENT_TEXT_DETECTION = 11;
  93
  94     // Run Safe Search to detect potentially unsafe
  95     // or undesirable content.
  96     SAFE_SEARCH_DETECTION = 6;
  97
  98     // Compute a set of image properties, such as the
  99     // image's dominant colors.
 100     IMAGE_PROPERTIES = 7;
 101
 102     // Run crop hints.
 103     CROP_HINTS = 9;
 104
 105     // Run web detection.
 106     WEB_DETECTION = 10;
 107
 108     // Run Product Search.
 109     PRODUCT_SEARCH = 12;
 110
 111     // Run localizer for object detection.
 112     OBJECT_LOCALIZATION = 19;
 113   }
 114
 115   // The feature type.
 116   Type type = 1;
 117
 118   // Maximum number of results of this type. Does not apply to
 119   // `TEXT_DETECTION`, `DOCUMENT_TEXT_DETECTION`, or `CROP_HINTS`.
 120   int32 max_results = 2;
 121
 122   // Model to use for the feature.
 123   // Supported values: "builtin/stable" (the default if unset) and
 124   // "builtin/latest".
 125   string model = 3;
 126 }
 127
 128 // External image source (Google Cloud Storage or web URL image location).
 129 message ImageSource {
 130   // **Use `image_uri` instead.**
 131   //
 132   // The Google Cloud Storage  URI of the form
 133   // `gs://bucket_name/object_name`. Object versioning is not supported. See
 134   // [Google Cloud Storage Request
 135   // URIs](https://cloud.google.com/storage/docs/reference-uris) for more info.
 136   string gcs_image_uri = 1;
 137
 138   // The URI of the source image. Can be either:
 139   //
 140   // 1. A Google Cloud Storage URI of the form
 141   //    `gs://bucket_name/object_name`. Object versioning is not supported. See
 142   //    [Google Cloud Storage Request
 143   //    URIs](https://cloud.google.com/storage/docs/reference-uris) for more
 144   //    info.
 145   //
 146   // 2. A publicly-accessible image HTTP/HTTPS URL. When fetching images from
 147   //    HTTP/HTTPS URLs, Google cannot guarantee that the request will be
 148   //    completed. Your request may fail if the specified host denies the
 149   //    request (e.g. due to request throttling or DOS prevention), or if Google
 150   //    throttles requests to the site for abuse prevention. You should not
 151   //    depend on externally-hosted images for production applications.
 152   //
 153   // When both `gcs_image_uri` and `image_uri` are specified, `image_uri` takes
 154   // precedence.
 155   string image_uri = 2;
 156 }
 157
 158 // Client image to perform Google Cloud Vision API tasks over.
 159 message Image {
 160   // Image content, represented as a stream of bytes.
 161   // Note: As with all `bytes` fields, protobuffers use a pure binary
 162   // representation, whereas JSON representations use base64.
 163   bytes content = 1;
 164
 165   // Google Cloud Storage image location, or publicly-accessible image
 166   // URL. If both `content` and `source` are provided for an image, `content`
 167   // takes precedence and is used to perform the image annotation request.
 168   ImageSource source = 2;
 169 }
 170
 171 // A face annotation object contains the results of face detection.
 172 message FaceAnnotation {
 173   // A face-specific landmark (for example, a face feature).
 174   message Landmark {
 175     // Face landmark (feature) type.
 176     // Left and right are defined from the vantage of the viewer of the image
 177     // without considering mirror projections typical of photos. So, `LEFT_EYE`,
 178     // typically, is the person's right eye.
 179     enum Type {
 180       // Unknown face landmark detected. Should not be filled.
 181       UNKNOWN_LANDMARK = 0;
 182
 183       // Left eye.
 184       LEFT_EYE = 1;
 185
 186       // Right eye.
 187       RIGHT_EYE = 2;
 188
 189       // Left of left eyebrow.
 190       LEFT_OF_LEFT_EYEBROW = 3;
 191
 192       // Right of left eyebrow.
 193       RIGHT_OF_LEFT_EYEBROW = 4;
 194
 195       // Left of right eyebrow.
 196       LEFT_OF_RIGHT_EYEBROW = 5;
 197
 198       // Right of right eyebrow.
 199       RIGHT_OF_RIGHT_EYEBROW = 6;
 200
 201       // Midpoint between eyes.
 202       MIDPOINT_BETWEEN_EYES = 7;
 203
 204       // Nose tip.
 205       NOSE_TIP = 8;
 206
 207       // Upper lip.
 208       UPPER_LIP = 9;
 209
 210       // Lower lip.
 211       LOWER_LIP = 10;
 212
 213       // Mouth left.
 214       MOUTH_LEFT = 11;
 215
 216       // Mouth right.
 217       MOUTH_RIGHT = 12;
 218
 219       // Mouth center.
 220       MOUTH_CENTER = 13;
 221
 222       // Nose, bottom right.
 223       NOSE_BOTTOM_RIGHT = 14;
 224
 225       // Nose, bottom left.
 226       NOSE_BOTTOM_LEFT = 15;
 227
 228       // Nose, bottom center.
 229       NOSE_BOTTOM_CENTER = 16;
 230
 231       // Left eye, top boundary.
 232       LEFT_EYE_TOP_BOUNDARY = 17;
 233
 234       // Left eye, right corner.
 235       LEFT_EYE_RIGHT_CORNER = 18;
 236
 237       // Left eye, bottom boundary.
 238       LEFT_EYE_BOTTOM_BOUNDARY = 19;
 239
 240       // Left eye, left corner.
 241       LEFT_EYE_LEFT_CORNER = 20;
 242
 243       // Right eye, top boundary.
 244       RIGHT_EYE_TOP_BOUNDARY = 21;
 245
 246       // Right eye, right corner.
 247       RIGHT_EYE_RIGHT_CORNER = 22;
 248
 249       // Right eye, bottom boundary.
 250       RIGHT_EYE_BOTTOM_BOUNDARY = 23;
 251
 252       // Right eye, left corner.
 253       RIGHT_EYE_LEFT_CORNER = 24;
 254
 255       // Left eyebrow, upper midpoint.
 256       LEFT_EYEBROW_UPPER_MIDPOINT = 25;
 257
 258       // Right eyebrow, upper midpoint.
 259       RIGHT_EYEBROW_UPPER_MIDPOINT = 26;
 260
 261       // Left ear tragion.
 262       LEFT_EAR_TRAGION = 27;
 263
 264       // Right ear tragion.
 265       RIGHT_EAR_TRAGION = 28;
 266
 267       // Left eye pupil.
 268       LEFT_EYE_PUPIL = 29;
 269
 270       // Right eye pupil.
 271       RIGHT_EYE_PUPIL = 30;
 272
 273       // Forehead glabella.
 274       FOREHEAD_GLABELLA = 31;
 275
 276       // Chin gnathion.
 277       CHIN_GNATHION = 32;
 278
 279       // Chin left gonion.
 280       CHIN_LEFT_GONION = 33;
 281
 282       // Chin right gonion.
 283       CHIN_RIGHT_GONION = 34;
 284     }
 285
 286     // Face landmark type.
 287     Type type = 3;
 288
 289     // Face landmark position.
 290     Position position = 4;
 291   }
 292
 293   // The bounding polygon around the face. The coordinates of the bounding box
 294   // are in the original image's scale, as returned in `ImageParams`.
 295   // The bounding box is computed to "frame" the face in accordance with human
 296   // expectations. It is based on the landmarker results.
 297   // Note that one or more x and/or y coordinates may not be generated in the
 298   // `BoundingPoly` (the polygon will be unbounded) if only a partial face
 299   // appears in the image to be annotated.
 300   BoundingPoly bounding_poly = 1;
 301
 302   // The `fd_bounding_poly` bounding polygon is tighter than the
 303   // `boundingPoly`, and encloses only the skin part of the face. Typically, it
 304   // is used to eliminate the face from any image analysis that detects the
 305   // "amount of skin" visible in an image. It is not based on the
 306   // landmarker results, only on the initial face detection, hence
 307   // the <code>fd</code> (face detection) prefix.
 308   BoundingPoly fd_bounding_poly = 2;
 309
 310   // Detected face landmarks.
 311   repeated Landmark landmarks = 3;
 312
 313   // Roll angle, which indicates the amount of clockwise/anti-clockwise rotation
 314   // of the face relative to the image vertical about the axis perpendicular to
 315   // the face. Range [-180,180].
 316   float roll_angle = 4;
 317
 318   // Yaw angle, which indicates the leftward/rightward angle that the face is
 319   // pointing relative to the vertical plane perpendicular to the image. Range
 320   // [-180,180].
 321   float pan_angle = 5;
 322
 323   // Pitch angle, which indicates the upwards/downwards angle that the face is
 324   // pointing relative to the image's horizontal plane. Range [-180,180].
 325   float tilt_angle = 6;
 326
 327   // Detection confidence. Range [0, 1].
 328   float detection_confidence = 7;
 329
 330   // Face landmarking confidence. Range [0, 1].
 331   float landmarking_confidence = 8;
 332
 333   // Joy likelihood.
 334   Likelihood joy_likelihood = 9;
 335
 336   // Sorrow likelihood.
 337   Likelihood sorrow_likelihood = 10;
 338
 339   // Anger likelihood.
 340   Likelihood anger_likelihood = 11;
 341
 342   // Surprise likelihood.
 343   Likelihood surprise_likelihood = 12;
 344
 345   // Under-exposed likelihood.
 346   Likelihood under_exposed_likelihood = 13;
 347
 348   // Blurred likelihood.
 349   Likelihood blurred_likelihood = 14;
 350
 351   // Headwear likelihood.
 352   Likelihood headwear_likelihood = 15;
 353 }
 354
 355 // Detected entity location information.
 356 message LocationInfo {
 357   // lat/long location coordinates.
 358   google.type.LatLng lat_lng = 1;
 359 }
 360
 361 // A `Property` consists of a user-supplied name/value pair.
 362 message Property {
 363   // Name of the property.
 364   string name = 1;
 365
 366   // Value of the property.
 367   string value = 2;
 368
 369   // Value of numeric properties.
 370   uint64 uint64_value = 3;
 371 }
 372
 373 // Set of detected entity features.
 374 message EntityAnnotation {
 375   // Opaque entity ID. Some IDs may be available in
 376   // [Google Knowledge Graph Search
 377   // API](https://developers.google.com/knowledge-graph/).
 378   string mid = 1;
 379
 380   // The language code for the locale in which the entity textual
 381   // `description` is expressed.
 382   string locale = 2;
 383
 384   // Entity textual description, expressed in its `locale` language.
 385   string description = 3;
 386
 387   // Overall score of the result. Range [0, 1].
 388   float score = 4;
 389
 390   // **Deprecated. Use `score` instead.**
 391   // The accuracy of the entity detection in an image.
 392   // For example, for an image in which the "Eiffel Tower" entity is detected,
 393   // this field represents the confidence that there is a tower in the query
 394   // image. Range [0, 1].
 395   float confidence = 5;
 396
 397   // The relevancy of the ICA (Image Content Annotation) label to the
 398   // image. For example, the relevancy of "tower" is likely higher to an image
 399   // containing the detected "Eiffel Tower" than to an image containing a
 400   // detected distant towering building, even though the confidence that
 401   // there is a tower in each image may be the same. Range [0, 1].
 402   float topicality = 6;
 403
 404   // Image region to which this entity belongs. Not produced
 405   // for `LABEL_DETECTION` features.
 406   BoundingPoly bounding_poly = 7;
 407
 408   // The location information for the detected entity. Multiple
 409   // `LocationInfo` elements can be present because one location may
 410   // indicate the location of the scene in the image, and another location
 411   // may indicate the location of the place where the image was taken.
 412   // Location information is usually present for landmarks.
 413   repeated LocationInfo locations = 8;
 414
 415   // Some entities may have optional user-supplied `Property` (name/value)
 416   // fields, such a score or string that qualifies the entity.
 417   repeated Property properties = 9;
 418 }
 419
 420 // Set of detected objects with bounding boxes.
 421 message LocalizedObjectAnnotation {
 422   // Object ID that should align with EntityAnnotation mid.
 423   string mid = 1;
 424
 425   // The BCP-47 language code, such as "en-US" or "sr-Latn". For more
 426   // information, see
 427   // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
 428   string language_code = 2;
 429
 430   // Object name, expressed in its `language_code` language.
 431   string name = 3;
 432
 433   // Score of the result. Range [0, 1].
 434   float score = 4;
 435
 436   // Image region to which this object belongs. This must be populated.
 437   BoundingPoly bounding_poly = 5;
 438 }
 439
 440 // Set of features pertaining to the image, computed by computer vision
 441 // methods over safe-search verticals (for example, adult, spoof, medical,
 442 // violence).
 443 message SafeSearchAnnotation {
 444   // Represents the adult content likelihood for the image. Adult content may
 445   // contain elements such as nudity, pornographic images or cartoons, or
 446   // sexual activities.
 447   Likelihood adult = 1;
 448
 449   // Spoof likelihood. The likelihood that an modification
 450   // was made to the image's canonical version to make it appear
 451   // funny or offensive.
 452   Likelihood spoof = 2;
 453
 454   // Likelihood that this is a medical image.
 455   Likelihood medical = 3;
 456
 457   // Likelihood that this image contains violent content.
 458   Likelihood violence = 4;
 459
 460   // Likelihood that the request image contains racy content. Racy content may
 461   // include (but is not limited to) skimpy or sheer clothing, strategically
 462   // covered nudity, lewd or provocative poses, or close-ups of sensitive
 463   // body areas.
 464   Likelihood racy = 9;
 465 }
 466
 467 // Rectangle determined by min and max `LatLng` pairs.
 468 message LatLongRect {
 469   // Min lat/long pair.
 470   google.type.LatLng min_lat_lng = 1;
 471
 472   // Max lat/long pair.
 473   google.type.LatLng max_lat_lng = 2;
 474 }
 475
 476 // Color information consists of RGB channels, score, and the fraction of
 477 // the image that the color occupies in the image.
 478 message ColorInfo {
 479   // RGB components of the color.
 480   google.type.Color color = 1;
 481
 482   // Image-specific score for this color. Value in range [0, 1].
 483   float score = 2;
 484
 485   // The fraction of pixels the color occupies in the image.
 486   // Value in range [0, 1].
 487   float pixel_fraction = 3;
 488 }
 489
 490 // Set of dominant colors and their corresponding scores.
 491 message DominantColorsAnnotation {
 492   // RGB color values with their score and pixel fraction.
 493   repeated ColorInfo colors = 1;
 494 }
 495
 496 // Stores image properties, such as dominant colors.
 497 message ImageProperties {
 498   // If present, dominant colors completed successfully.
 499   DominantColorsAnnotation dominant_colors = 1;
 500 }
 501
 502 // Single crop hint that is used to generate a new crop when serving an image.
 503 message CropHint {
 504   // The bounding polygon for the crop region. The coordinates of the bounding
 505   // box are in the original image's scale, as returned in `ImageParams`.
 506   BoundingPoly bounding_poly = 1;
 507
 508   // Confidence of this being a salient region.  Range [0, 1].
 509   float confidence = 2;
 510
 511   // Fraction of importance of this salient region with respect to the original
 512   // image.
 513   float importance_fraction = 3;
 514 }
 515
 516 // Set of crop hints that are used to generate new crops when serving images.
 517 message CropHintsAnnotation {
 518   // Crop hint results.
 519   repeated CropHint crop_hints = 1;
 520 }
 521
 522 // Parameters for crop hints annotation request.
 523 message CropHintsParams {
 524   // Aspect ratios in floats, representing the ratio of the width to the height
 525   // of the image. For example, if the desired aspect ratio is 4/3, the
 526   // corresponding float value should be 1.33333.  If not specified, the
 527   // best possible crop is returned. The number of provided aspect ratios is
 528   // limited to a maximum of 16; any aspect ratios provided after the 16th are
 529   // ignored.
 530   repeated float aspect_ratios = 1;
 531 }
 532
 533 // Parameters for web detection request.
 534 message WebDetectionParams {
 535   // Whether to include results derived from the geo information in the image.
 536   bool include_geo_results = 2;
 537 }
 538
 539 // Image context and/or feature-specific parameters.
 540 message ImageContext {
 541   // Not used.
 542   LatLongRect lat_long_rect = 1;
 543
 544   // List of languages to use for TEXT_DETECTION. In most cases, an empty value
 545   // yields the best results since it enables automatic language detection. For
 546   // languages based on the Latin alphabet, setting `language_hints` is not
 547   // needed. In rare cases, when the language of the text in the image is known,
 548   // setting a hint will help get better results (although it will be a
 549   // significant hindrance if the hint is wrong). Text detection returns an
 550   // error if one or more of the specified languages is not one of the
 551   // [supported languages](/vision/docs/languages).
 552   repeated string language_hints = 2;
 553
 554   // Parameters for crop hints annotation request.
 555   CropHintsParams crop_hints_params = 4;
 556
 557   // Parameters for product search.
 558   google.cloud.vision.v1p3beta1.ProductSearchParams product_search_params = 5;
 559
 560   // Parameters for web detection.
 561   WebDetectionParams web_detection_params = 6;
 562 }
 563
 564 // Request for performing Google Cloud Vision API tasks over a user-provided
 565 // image, with user-requested features.
 566 message AnnotateImageRequest {
 567   // The image to be processed.
 568   Image image = 1;
 569
 570   // Requested features.
 571   repeated Feature features = 2;
 572
 573   // Additional context that may accompany the image.
 574   ImageContext image_context = 3;
 575 }
 576
 577 // If an image was produced from a file (e.g. a PDF), this message gives
 578 // information about the source of that image.
 579 message ImageAnnotationContext {
 580   // The URI of the file used to produce the image.
 581   string uri = 1;
 582
 583   // If the file was a PDF or TIFF, this field gives the page number within
 584   // the file used to produce the image.
 585   int32 page_number = 2;
 586 }
 587
 588 // Response to an image annotation request.
 589 message AnnotateImageResponse {
 590   // If present, face detection has completed successfully.
 591   repeated FaceAnnotation face_annotations = 1;
 592
 593   // If present, landmark detection has completed successfully.
 594   repeated EntityAnnotation landmark_annotations = 2;
 595
 596   // If present, logo detection has completed successfully.
 597   repeated EntityAnnotation logo_annotations = 3;
 598
 599   // If present, label detection has completed successfully.
 600   repeated EntityAnnotation label_annotations = 4;
 601
 602   // If present, localized object detection has completed successfully.
 603   // This will be sorted descending by confidence score.
 604   repeated LocalizedObjectAnnotation localized_object_annotations = 22;
 605
 606   // If present, text (OCR) detection has completed successfully.
 607   repeated EntityAnnotation text_annotations = 5;
 608
 609   // If present, text (OCR) detection or document (OCR) text detection has
 610   // completed successfully.
 611   // This annotation provides the structural hierarchy for the OCR detected
 612   // text.
 613   TextAnnotation full_text_annotation = 12;
 614
 615   // If present, safe-search annotation has completed successfully.
 616   SafeSearchAnnotation safe_search_annotation = 6;
 617
 618   // If present, image properties were extracted successfully.
 619   ImageProperties image_properties_annotation = 8;
 620
 621   // If present, crop hints have completed successfully.
 622   CropHintsAnnotation crop_hints_annotation = 11;
 623
 624   // If present, web detection has completed successfully.
 625   WebDetection web_detection = 13;
 626
 627   // If present, product search has completed successfully.
 628   google.cloud.vision.v1p3beta1.ProductSearchResults product_search_results =
 629       14;
 630
 631   // If set, represents the error message for the operation.
 632   // Note that filled-in image annotations are guaranteed to be
 633   // correct, even when `error` is set.
 634   google.rpc.Status error = 9;
 635
 636   // If present, contextual information is needed to understand where this image
 637   // comes from.
 638   ImageAnnotationContext context = 21;
 639 }
 640
 641 // Response to a single file annotation request. A file may contain one or more
 642 // images, which individually have their own responses.
 643 message AnnotateFileResponse {
 644   // Information about the file for which this response is generated.
 645   InputConfig input_config = 1;
 646
 647   // Individual responses to images found within the file.
 648   repeated AnnotateImageResponse responses = 2;
 649 }
 650
 651 // Multiple image annotation requests are batched into a single service call.
 652 message BatchAnnotateImagesRequest {
 653   // Individual image annotation requests for this batch.
 654   repeated AnnotateImageRequest requests = 1;
 655 }
 656
 657 // Response to a batch image annotation request.
 658 message BatchAnnotateImagesResponse {
 659   // Individual responses to image annotation requests within the batch.
 660   repeated AnnotateImageResponse responses = 1;
 661 }
 662
 663 // An offline file annotation request.
 664 message AsyncAnnotateFileRequest {
 665   // Required. Information about the input file.
 666   InputConfig input_config = 1;
 667
 668   // Required. Requested features.
 669   repeated Feature features = 2;
 670
 671   // Additional context that may accompany the image(s) in the file.
 672   ImageContext image_context = 3;
 673
 674   // Required. The desired output location and metadata (e.g. format).
 675   OutputConfig output_config = 4;
 676 }
 677
 678 // The response for a single offline file annotation request.
 679 message AsyncAnnotateFileResponse {
 680   // The output location and metadata from AsyncAnnotateFileRequest.
 681   OutputConfig output_config = 1;
 682 }
 683
 684 // Multiple async file annotation requests are batched into a single service
 685 // call.
 686 message AsyncBatchAnnotateFilesRequest {
 687   // Individual async file annotation requests for this batch.
 688   repeated AsyncAnnotateFileRequest requests = 1;
 689 }
 690
 691 // Response to an async batch file annotation request.
 692 message AsyncBatchAnnotateFilesResponse {
 693   // The list of file annotation responses, one for each request in
 694   // AsyncBatchAnnotateFilesRequest.
 695   repeated AsyncAnnotateFileResponse responses = 1;
 696 }
 697
 698 // The desired input location and metadata.
 699 message InputConfig {
 700   // The Google Cloud Storage location to read the input from.
 701   GcsSource gcs_source = 1;
 702
 703   // The type of the file. Currently only "application/pdf" and "image/tiff"
 704   // are supported. Wildcards are not supported.
 705   string mime_type = 2;
 706 }
 707
 708 // The desired output location and metadata.
 709 message OutputConfig {
 710   // The Google Cloud Storage location to write the output(s) to.
 711   GcsDestination gcs_destination = 1;
 712
 713   // The max number of response protos to put into each output JSON file on
 714   // Google Cloud Storage.
 715   // The valid range is [1, 100]. If not specified, the default value is 20.
 716   //
 717   // For example, for one pdf file with 100 pages, 100 response protos will
 718   // be generated. If `batch_size` = 20, then 5 json files each
 719   // containing 20 response protos will be written under the prefix
 720   // `gcs_destination`.`uri`.
 721   //
 722   // Currently, batch_size only applies to GcsDestination, with potential future
 723   // support for other output configurations.
 724   int32 batch_size = 2;
 725 }
 726
 727 // The Google Cloud Storage location where the input will be read from.
 728 message GcsSource {
 729   // Google Cloud Storage URI for the input file. This must only be a
 730   // Google Cloud Storage object. Wildcards are not currently supported.
 731   string uri = 1;
 732 }
 733
 734 // The Google Cloud Storage location where the output will be written to.
 735 message GcsDestination {
 736   // Google Cloud Storage URI where the results will be stored. Results will
 737   // be in JSON format and preceded by its corresponding input URI. This field
 738   // can either represent a single file, or a prefix for multiple outputs.
 739   // Prefixes must end in a `/`.
 740   //
 741   // Examples:
 742   //
 743   // *    File: gs://bucket-name/filename.json
 744   // *    Prefix: gs://bucket-name/prefix/here/
 745   // *    File: gs://bucket-name/prefix/here
 746   //
 747   // If multiple outputs, each response is still AnnotateFileResponse, each of
 748   // which contains some subset of the full list of AnnotateImageResponse.
 749   // Multiple outputs can happen if, for example, the output JSON is too large
 750   // and overflows into multiple sharded files.
 751   string uri = 1;
 752 }
 753
 754 // A bucketized representation of likelihood, which is intended to give clients
 755 // highly stable results across model upgrades.
 756 enum Likelihood {
 757   // Unknown likelihood.
 758   UNKNOWN = 0;
 759
 760   // It is very unlikely that the image belongs to the specified vertical.
 761   VERY_UNLIKELY = 1;
 762
 763   // It is unlikely that the image belongs to the specified vertical.
 764   UNLIKELY = 2;
 765
 766   // It is possible that the image belongs to the specified vertical.
 767   POSSIBLE = 3;
 768
 769   // It is likely that the image belongs to the specified vertical.
 770   LIKELY = 4;
 771
 772   // It is very likely that the image belongs to the specified vertical.
 773   VERY_LIKELY = 5;
 774 }
 775
 776 // Contains metadata for the BatchAnnotateImages operation.
 777 message OperationMetadata {
 778   // Batch operation states.
 779   enum State {
 780     // Invalid.
 781     STATE_UNSPECIFIED = 0;
 782
 783     // Request is received.
 784     CREATED = 1;
 785
 786     // Request is actively being processed.
 787     RUNNING = 2;
 788
 789     // The batch processing is done.
 790     DONE = 3;
 791
 792     // The batch processing was cancelled.
 793     CANCELLED = 4;
 794   }
 795   // Current state of the batch operation.
 796   State state = 1;
 797
 798   // The time when the batch request was received.
 799   google.protobuf.Timestamp create_time = 5;
 800
 801   // The time when the operation result was last updated.
 802   google.protobuf.Timestamp update_time = 6;
 803 }