legacy-libs/google-proto-files/google/cloud/vision/v1p1beta1/image_annotator.proto

   1 // Copyright 2017 Google Inc.
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //     http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15 syntax = "proto3";
  16
  17 package google.cloud.vision.v1p1beta1;
  18
  19 import "google/api/annotations.proto";
  20 import "google/cloud/vision/v1p1beta1/geometry.proto";
  21 import "google/cloud/vision/v1p1beta1/text_annotation.proto";
  22 import "google/cloud/vision/v1p1beta1/web_detection.proto";
  23 import "google/rpc/status.proto";
  24 import "google/type/color.proto";
  25 import "google/type/latlng.proto";
  26
  27 option cc_enable_arenas = true;
  28 option go_package = "google.golang.org/genproto/googleapis/cloud/vision/v1p1beta1;vision";
  29 option java_multiple_files = true;
  30 option java_outer_classname = "ImageAnnotatorProto";
  31 option java_package = "com.google.cloud.vision.v1p1beta1";
  32
  33 // Service that performs Google Cloud Vision API detection tasks over client
  34 // images, such as face, landmark, logo, label, and text detection. The
  35 // ImageAnnotator service returns detected entities from the images.
  36 service ImageAnnotator {
  37   // Run image detection and annotation for a batch of images.
  38   rpc BatchAnnotateImages(BatchAnnotateImagesRequest)
  39       returns (BatchAnnotateImagesResponse) {
  40     option (google.api.http) = {
  41       post: "/v1p1beta1/images:annotate"
  42       body: "*"
  43     };
  44   }
  45 }
  46
  47 // Users describe the type of Google Cloud Vision API tasks to perform over
  48 // images by using *Feature*s. Each Feature indicates a type of image
  49 // detection task to perform. Features encode the Cloud Vision API
  50 // vertical to operate on and the number of top-scoring results to return.
  51 message Feature {
  52   // Type of image feature.
  53   enum Type {
  54     // Unspecified feature type.
  55     TYPE_UNSPECIFIED = 0;
  56
  57     // Run face detection.
  58     FACE_DETECTION = 1;
  59
  60     // Run landmark detection.
  61     LANDMARK_DETECTION = 2;
  62
  63     // Run logo detection.
  64     LOGO_DETECTION = 3;
  65
  66     // Run label detection.
  67     LABEL_DETECTION = 4;
  68
  69     // Run OCR.
  70     TEXT_DETECTION = 5;
  71
  72     // Run dense text document OCR. Takes precedence when both
  73     // DOCUMENT_TEXT_DETECTION and TEXT_DETECTION are present.
  74     DOCUMENT_TEXT_DETECTION = 11;
  75
  76     // Run computer vision models to compute image safe-search properties.
  77     SAFE_SEARCH_DETECTION = 6;
  78
  79     // Compute a set of image properties, such as the image's dominant colors.
  80     IMAGE_PROPERTIES = 7;
  81
  82     // Run crop hints.
  83     CROP_HINTS = 9;
  84
  85     // Run web detection.
  86     WEB_DETECTION = 10;
  87   }
  88
  89   // The feature type.
  90   Type type = 1;
  91
  92   // Maximum number of results of this type.
  93   int32 max_results = 2;
  94
  95   // Model to use for the feature.
  96   // Supported values: "builtin/stable" (the default if unset) and
  97   // "builtin/latest".
  98   string model = 3;
  99 }
 100
 101 // External image source (Google Cloud Storage image location).
 102 message ImageSource {
 103   // NOTE: For new code `image_uri` below is preferred.
 104   // Google Cloud Storage image URI, which must be in the following form:
 105   // `gs://bucket_name/object_name` (for details, see
 106   // [Google Cloud Storage Request
 107   // URIs](https://cloud.google.com/storage/docs/reference-uris)).
 108   // NOTE: Cloud Storage object versioning is not supported.
 109   string gcs_image_uri = 1;
 110
 111   // Image URI which supports:
 112   // 1) Google Cloud Storage image URI, which must be in the following form:
 113   // `gs://bucket_name/object_name` (for details, see
 114   // [Google Cloud Storage Request
 115   // URIs](https://cloud.google.com/storage/docs/reference-uris)).
 116   // NOTE: Cloud Storage object versioning is not supported.
 117   // 2) Publicly accessible image HTTP/HTTPS URL.
 118   // This is preferred over the legacy `gcs_image_uri` above. When both
 119   // `gcs_image_uri` and `image_uri` are specified, `image_uri` takes
 120   // precedence.
 121   string image_uri = 2;
 122 }
 123
 124 // Client image to perform Google Cloud Vision API tasks over.
 125 message Image {
 126   // Image content, represented as a stream of bytes.
 127   // Note: as with all `bytes` fields, protobuffers use a pure binary
 128   // representation, whereas JSON representations use base64.
 129   bytes content = 1;
 130
 131   // Google Cloud Storage image location. If both `content` and `source`
 132   // are provided for an image, `content` takes precedence and is
 133   // used to perform the image annotation request.
 134   ImageSource source = 2;
 135 }
 136
 137 // A face annotation object contains the results of face detection.
 138 message FaceAnnotation {
 139   // A face-specific landmark (for example, a face feature).
 140   message Landmark {
 141     // Face landmark (feature) type.
 142     // Left and right are defined from the vantage of the viewer of the image
 143     // without considering mirror projections typical of photos. So, `LEFT_EYE`,
 144     // typically, is the person's right eye.
 145     enum Type {
 146       // Unknown face landmark detected. Should not be filled.
 147       UNKNOWN_LANDMARK = 0;
 148
 149       // Left eye.
 150       LEFT_EYE = 1;
 151
 152       // Right eye.
 153       RIGHT_EYE = 2;
 154
 155       // Left of left eyebrow.
 156       LEFT_OF_LEFT_EYEBROW = 3;
 157
 158       // Right of left eyebrow.
 159       RIGHT_OF_LEFT_EYEBROW = 4;
 160
 161       // Left of right eyebrow.
 162       LEFT_OF_RIGHT_EYEBROW = 5;
 163
 164       // Right of right eyebrow.
 165       RIGHT_OF_RIGHT_EYEBROW = 6;
 166
 167       // Midpoint between eyes.
 168       MIDPOINT_BETWEEN_EYES = 7;
 169
 170       // Nose tip.
 171       NOSE_TIP = 8;
 172
 173       // Upper lip.
 174       UPPER_LIP = 9;
 175
 176       // Lower lip.
 177       LOWER_LIP = 10;
 178
 179       // Mouth left.
 180       MOUTH_LEFT = 11;
 181
 182       // Mouth right.
 183       MOUTH_RIGHT = 12;
 184
 185       // Mouth center.
 186       MOUTH_CENTER = 13;
 187
 188       // Nose, bottom right.
 189       NOSE_BOTTOM_RIGHT = 14;
 190
 191       // Nose, bottom left.
 192       NOSE_BOTTOM_LEFT = 15;
 193
 194       // Nose, bottom center.
 195       NOSE_BOTTOM_CENTER = 16;
 196
 197       // Left eye, top boundary.
 198       LEFT_EYE_TOP_BOUNDARY = 17;
 199
 200       // Left eye, right corner.
 201       LEFT_EYE_RIGHT_CORNER = 18;
 202
 203       // Left eye, bottom boundary.
 204       LEFT_EYE_BOTTOM_BOUNDARY = 19;
 205
 206       // Left eye, left corner.
 207       LEFT_EYE_LEFT_CORNER = 20;
 208
 209       // Right eye, top boundary.
 210       RIGHT_EYE_TOP_BOUNDARY = 21;
 211
 212       // Right eye, right corner.
 213       RIGHT_EYE_RIGHT_CORNER = 22;
 214
 215       // Right eye, bottom boundary.
 216       RIGHT_EYE_BOTTOM_BOUNDARY = 23;
 217
 218       // Right eye, left corner.
 219       RIGHT_EYE_LEFT_CORNER = 24;
 220
 221       // Left eyebrow, upper midpoint.
 222       LEFT_EYEBROW_UPPER_MIDPOINT = 25;
 223
 224       // Right eyebrow, upper midpoint.
 225       RIGHT_EYEBROW_UPPER_MIDPOINT = 26;
 226
 227       // Left ear tragion.
 228       LEFT_EAR_TRAGION = 27;
 229
 230       // Right ear tragion.
 231       RIGHT_EAR_TRAGION = 28;
 232
 233       // Left eye pupil.
 234       LEFT_EYE_PUPIL = 29;
 235
 236       // Right eye pupil.
 237       RIGHT_EYE_PUPIL = 30;
 238
 239       // Forehead glabella.
 240       FOREHEAD_GLABELLA = 31;
 241
 242       // Chin gnathion.
 243       CHIN_GNATHION = 32;
 244
 245       // Chin left gonion.
 246       CHIN_LEFT_GONION = 33;
 247
 248       // Chin right gonion.
 249       CHIN_RIGHT_GONION = 34;
 250     }
 251
 252     // Face landmark type.
 253     Type type = 3;
 254
 255     // Face landmark position.
 256     Position position = 4;
 257   }
 258
 259   // The bounding polygon around the face. The coordinates of the bounding box
 260   // are in the original image's scale, as returned in `ImageParams`.
 261   // The bounding box is computed to "frame" the face in accordance with human
 262   // expectations. It is based on the landmarker results.
 263   // Note that one or more x and/or y coordinates may not be generated in the
 264   // `BoundingPoly` (the polygon will be unbounded) if only a partial face
 265   // appears in the image to be annotated.
 266   BoundingPoly bounding_poly = 1;
 267
 268   // The `fd_bounding_poly` bounding polygon is tighter than the
 269   // `boundingPoly`, and encloses only the skin part of the face. Typically, it
 270   // is used to eliminate the face from any image analysis that detects the
 271   // "amount of skin" visible in an image. It is not based on the
 272   // landmarker results, only on the initial face detection, hence
 273   // the <code>fd</code> (face detection) prefix.
 274   BoundingPoly fd_bounding_poly = 2;
 275
 276   // Detected face landmarks.
 277   repeated Landmark landmarks = 3;
 278
 279   // Roll angle, which indicates the amount of clockwise/anti-clockwise rotation
 280   // of the face relative to the image vertical about the axis perpendicular to
 281   // the face. Range [-180,180].
 282   float roll_angle = 4;
 283
 284   // Yaw angle, which indicates the leftward/rightward angle that the face is
 285   // pointing relative to the vertical plane perpendicular to the image. Range
 286   // [-180,180].
 287   float pan_angle = 5;
 288
 289   // Pitch angle, which indicates the upwards/downwards angle that the face is
 290   // pointing relative to the image's horizontal plane. Range [-180,180].
 291   float tilt_angle = 6;
 292
 293   // Detection confidence. Range [0, 1].
 294   float detection_confidence = 7;
 295
 296   // Face landmarking confidence. Range [0, 1].
 297   float landmarking_confidence = 8;
 298
 299   // Joy likelihood.
 300   Likelihood joy_likelihood = 9;
 301
 302   // Sorrow likelihood.
 303   Likelihood sorrow_likelihood = 10;
 304
 305   // Anger likelihood.
 306   Likelihood anger_likelihood = 11;
 307
 308   // Surprise likelihood.
 309   Likelihood surprise_likelihood = 12;
 310
 311   // Under-exposed likelihood.
 312   Likelihood under_exposed_likelihood = 13;
 313
 314   // Blurred likelihood.
 315   Likelihood blurred_likelihood = 14;
 316
 317   // Headwear likelihood.
 318   Likelihood headwear_likelihood = 15;
 319 }
 320
 321 // Detected entity location information.
 322 message LocationInfo {
 323   // lat/long location coordinates.
 324   google.type.LatLng lat_lng = 1;
 325 }
 326
 327 // A `Property` consists of a user-supplied name/value pair.
 328 message Property {
 329   // Name of the property.
 330   string name = 1;
 331
 332   // Value of the property.
 333   string value = 2;
 334
 335   // Value of numeric properties.
 336   uint64 uint64_value = 3;
 337 }
 338
 339 // Set of detected entity features.
 340 message EntityAnnotation {
 341   // Opaque entity ID. Some IDs may be available in
 342   // [Google Knowledge Graph Search
 343   // API](https://developers.google.com/knowledge-graph/).
 344   string mid = 1;
 345
 346   // The language code for the locale in which the entity textual
 347   // `description` is expressed.
 348   string locale = 2;
 349
 350   // Entity textual description, expressed in its `locale` language.
 351   string description = 3;
 352
 353   // Overall score of the result. Range [0, 1].
 354   float score = 4;
 355
 356   // The accuracy of the entity detection in an image.
 357   // For example, for an image in which the "Eiffel Tower" entity is detected,
 358   // this field represents the confidence that there is a tower in the query
 359   // image. Range [0, 1].
 360   float confidence = 5;
 361
 362   // The relevancy of the ICA (Image Content Annotation) label to the
 363   // image. For example, the relevancy of "tower" is likely higher to an image
 364   // containing the detected "Eiffel Tower" than to an image containing a
 365   // detected distant towering building, even though the confidence that
 366   // there is a tower in each image may be the same. Range [0, 1].
 367   float topicality = 6;
 368
 369   // Image region to which this entity belongs. Not produced
 370   // for `LABEL_DETECTION` features.
 371   BoundingPoly bounding_poly = 7;
 372
 373   // The location information for the detected entity. Multiple
 374   // `LocationInfo` elements can be present because one location may
 375   // indicate the location of the scene in the image, and another location
 376   // may indicate the location of the place where the image was taken.
 377   // Location information is usually present for landmarks.
 378   repeated LocationInfo locations = 8;
 379
 380   // Some entities may have optional user-supplied `Property` (name/value)
 381   // fields, such a score or string that qualifies the entity.
 382   repeated Property properties = 9;
 383 }
 384
 385 // Set of features pertaining to the image, computed by computer vision
 386 // methods over safe-search verticals (for example, adult, spoof, medical,
 387 // violence).
 388 message SafeSearchAnnotation {
 389   // Represents the adult content likelihood for the image. Adult content may
 390   // contain elements such as nudity, pornographic images or cartoons, or
 391   // sexual activities.
 392   Likelihood adult = 1;
 393
 394   // Spoof likelihood. The likelihood that an modification
 395   // was made to the image's canonical version to make it appear
 396   // funny or offensive.
 397   Likelihood spoof = 2;
 398
 399   // Likelihood that this is a medical image.
 400   Likelihood medical = 3;
 401
 402   // Likelihood that this image contains violent content.
 403   Likelihood violence = 4;
 404
 405   // Likelihood that the request image contains racy content. Racy content may
 406   // include (but is not limited to) skimpy or sheer clothing, strategically
 407   // covered nudity, lewd or provocative poses, or close-ups of sensitive
 408   // body areas.
 409   Likelihood racy = 9;
 410 }
 411
 412 // Rectangle determined by min and max `LatLng` pairs.
 413 message LatLongRect {
 414   // Min lat/long pair.
 415   google.type.LatLng min_lat_lng = 1;
 416
 417   // Max lat/long pair.
 418   google.type.LatLng max_lat_lng = 2;
 419 }
 420
 421 // Color information consists of RGB channels, score, and the fraction of
 422 // the image that the color occupies in the image.
 423 message ColorInfo {
 424   // RGB components of the color.
 425   google.type.Color color = 1;
 426
 427   // Image-specific score for this color. Value in range [0, 1].
 428   float score = 2;
 429
 430   // The fraction of pixels the color occupies in the image.
 431   // Value in range [0, 1].
 432   float pixel_fraction = 3;
 433 }
 434
 435 // Set of dominant colors and their corresponding scores.
 436 message DominantColorsAnnotation {
 437   // RGB color values with their score and pixel fraction.
 438   repeated ColorInfo colors = 1;
 439 }
 440
 441 // Stores image properties, such as dominant colors.
 442 message ImageProperties {
 443   // If present, dominant colors completed successfully.
 444   DominantColorsAnnotation dominant_colors = 1;
 445 }
 446
 447 // Single crop hint that is used to generate a new crop when serving an image.
 448 message CropHint {
 449   // The bounding polygon for the crop region. The coordinates of the bounding
 450   // box are in the original image's scale, as returned in `ImageParams`.
 451   BoundingPoly bounding_poly = 1;
 452
 453   // Confidence of this being a salient region.  Range [0, 1].
 454   float confidence = 2;
 455
 456   // Fraction of importance of this salient region with respect to the original
 457   // image.
 458   float importance_fraction = 3;
 459 }
 460
 461 // Set of crop hints that are used to generate new crops when serving images.
 462 message CropHintsAnnotation {
 463   // Crop hint results.
 464   repeated CropHint crop_hints = 1;
 465 }
 466
 467 // Parameters for crop hints annotation request.
 468 message CropHintsParams {
 469   // Aspect ratios in floats, representing the ratio of the width to the height
 470   // of the image. For example, if the desired aspect ratio is 4/3, the
 471   // corresponding float value should be 1.33333.  If not specified, the
 472   // best possible crop is returned. The number of provided aspect ratios is
 473   // limited to a maximum of 16; any aspect ratios provided after the 16th are
 474   // ignored.
 475   repeated float aspect_ratios = 1;
 476 }
 477
 478 // Parameters for web detection request.
 479 message WebDetectionParams {
 480   // Whether to include results derived from the geo information in the image.
 481   bool include_geo_results = 2;
 482 }
 483
 484 // Image context and/or feature-specific parameters.
 485 message ImageContext {
 486   // lat/long rectangle that specifies the location of the image.
 487   LatLongRect lat_long_rect = 1;
 488
 489   // List of languages to use for TEXT_DETECTION. In most cases, an empty value
 490   // yields the best results since it enables automatic language detection. For
 491   // languages based on the Latin alphabet, setting `language_hints` is not
 492   // needed. In rare cases, when the language of the text in the image is known,
 493   // setting a hint will help get better results (although it will be a
 494   // significant hindrance if the hint is wrong). Text detection returns an
 495   // error if one or more of the specified languages is not one of the
 496   // [supported languages](/vision/docs/languages).
 497   repeated string language_hints = 2;
 498
 499   // Parameters for crop hints annotation request.
 500   CropHintsParams crop_hints_params = 4;
 501
 502   // Parameters for web detection.
 503   WebDetectionParams web_detection_params = 6;
 504 }
 505
 506 // Request for performing Google Cloud Vision API tasks over a user-provided
 507 // image, with user-requested features.
 508 message AnnotateImageRequest {
 509   // The image to be processed.
 510   Image image = 1;
 511
 512   // Requested features.
 513   repeated Feature features = 2;
 514
 515   // Additional context that may accompany the image.
 516   ImageContext image_context = 3;
 517 }
 518
 519 // Response to an image annotation request.
 520 message AnnotateImageResponse {
 521   // If present, face detection has completed successfully.
 522   repeated FaceAnnotation face_annotations = 1;
 523
 524   // If present, landmark detection has completed successfully.
 525   repeated EntityAnnotation landmark_annotations = 2;
 526
 527   // If present, logo detection has completed successfully.
 528   repeated EntityAnnotation logo_annotations = 3;
 529
 530   // If present, label detection has completed successfully.
 531   repeated EntityAnnotation label_annotations = 4;
 532
 533   // If present, text (OCR) detection has completed successfully.
 534   repeated EntityAnnotation text_annotations = 5;
 535
 536   // If present, text (OCR) detection or document (OCR) text detection has
 537   // completed successfully.
 538   // This annotation provides the structural hierarchy for the OCR detected
 539   // text.
 540   TextAnnotation full_text_annotation = 12;
 541
 542   // If present, safe-search annotation has completed successfully.
 543   SafeSearchAnnotation safe_search_annotation = 6;
 544
 545   // If present, image properties were extracted successfully.
 546   ImageProperties image_properties_annotation = 8;
 547
 548   // If present, crop hints have completed successfully.
 549   CropHintsAnnotation crop_hints_annotation = 11;
 550
 551   // If present, web detection has completed successfully.
 552   WebDetection web_detection = 13;
 553
 554   // If set, represents the error message for the operation.
 555   // Note that filled-in image annotations are guaranteed to be
 556   // correct, even when `error` is set.
 557   google.rpc.Status error = 9;
 558 }
 559
 560 // Multiple image annotation requests are batched into a single service call.
 561 message BatchAnnotateImagesRequest {
 562   // Individual image annotation requests for this batch.
 563   repeated AnnotateImageRequest requests = 1;
 564 }
 565
 566 // Response to a batch image annotation request.
 567 message BatchAnnotateImagesResponse {
 568   // Individual responses to image annotation requests within the batch.
 569   repeated AnnotateImageResponse responses = 1;
 570 }
 571
 572 // A bucketized representation of likelihood, which is intended to give clients
 573 // highly stable results across model upgrades.
 574 enum Likelihood {
 575   // Unknown likelihood.
 576   UNKNOWN = 0;
 577
 578   // It is very unlikely that the image belongs to the specified vertical.
 579   VERY_UNLIKELY = 1;
 580
 581   // It is unlikely that the image belongs to the specified vertical.
 582   UNLIKELY = 2;
 583
 584   // It is possible that the image belongs to the specified vertical.
 585   POSSIBLE = 3;
 586
 587   // It is likely that the image belongs to the specified vertical.
 588   LIKELY = 4;
 589
 590   // It is very likely that the image belongs to the specified vertical.
 591   VERY_LIKELY = 5;
 592 }