legacy-libs/google-proto-files/google/cloud/videointelligence/v1p3beta1/video_intelligence.proto

   1 // Copyright 2018 Google LLC.
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //     http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14 //
  15
  16 syntax = "proto3";
  17
  18 package google.cloud.videointelligence.v1p3beta1;
  19
  20 import "google/api/annotations.proto";
  21 import "google/longrunning/operations.proto";
  22 import "google/protobuf/duration.proto";
  23 import "google/protobuf/timestamp.proto";
  24 import "google/rpc/status.proto";
  25
  26 option csharp_namespace = "Google.Cloud.VideoIntelligence.V1P3Beta1";
  27 option go_package = "google.golang.org/genproto/googleapis/cloud/videointelligence/v1p3beta1;videointelligence";
  28 option java_multiple_files = true;
  29 option java_outer_classname = "VideoIntelligenceServiceProto";
  30 option java_package = "com.google.cloud.videointelligence.v1p3beta1";
  31 option php_namespace = "Google\\Cloud\\VideoIntelligence\\V1p3beta1";
  32
  33 // Service that implements Google Cloud Video Intelligence API.
  34 service VideoIntelligenceService {
  35   // Performs asynchronous video annotation. Progress and results can be
  36   // retrieved through the `google.longrunning.Operations` interface.
  37   // `Operation.metadata` contains `AnnotateVideoProgress` (progress).
  38   // `Operation.response` contains `AnnotateVideoResponse` (results).
  39   rpc AnnotateVideo(AnnotateVideoRequest)
  40       returns (google.longrunning.Operation) {
  41     option (google.api.http) = {
  42       post: "/v1p3beta1/videos:annotate"
  43       body: "*"
  44     };
  45   }
  46 }
  47
  48 // Service that implements Google Cloud Video Intelligence Streaming API.
  49 service StreamingVideoIntelligenceService {
  50   // Performs video annotation with bidirectional streaming: emitting results
  51   // while sending video/audio bytes.
  52   // This method is only available via the gRPC API (not REST).
  53   rpc StreamingAnnotateVideo(stream StreamingAnnotateVideoRequest)
  54       returns (stream StreamingAnnotateVideoResponse);
  55 }
  56
  57 // Video annotation request.
  58 message AnnotateVideoRequest {
  59   // Input video location. Currently, only
  60   // [Google Cloud Storage](https://cloud.google.com/storage/) URIs are
  61   // supported, which must be specified in the following format:
  62   // `gs://bucket-id/object-id` (other URI formats return
  63   // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For
  64   // more information, see [Request URIs](/storage/docs/reference-uris). A video
  65   // URI may include wildcards in `object-id`, and thus identify multiple
  66   // videos. Supported wildcards: '*' to match 0 or more characters;
  67   // '?' to match 1 character. If unset, the input video should be embedded
  68   // in the request as `input_content`. If set, `input_content` should be unset.
  69   string input_uri = 1;
  70
  71   // The video data bytes.
  72   // If unset, the input video(s) should be specified via `input_uri`.
  73   // If set, `input_uri` should be unset.
  74   bytes input_content = 6;
  75
  76   // Requested video annotation features.
  77   repeated Feature features = 2;
  78
  79   // Additional video context and/or feature-specific parameters.
  80   VideoContext video_context = 3;
  81
  82   // Optional location where the output (in JSON format) should be stored.
  83   // Currently, only [Google Cloud Storage](https://cloud.google.com/storage/)
  84   // URIs are supported, which must be specified in the following format:
  85   // `gs://bucket-id/object-id` (other URI formats return
  86   // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For
  87   // more information, see [Request URIs](/storage/docs/reference-uris).
  88   string output_uri = 4;
  89
  90   // Optional cloud region where annotation should take place. Supported cloud
  91   // regions: `us-east1`, `us-west1`, `europe-west1`, `asia-east1`. If no region
  92   // is specified, a region will be determined based on video file location.
  93   string location_id = 5;
  94 }
  95
  96 // Video context and/or feature-specific parameters.
  97 message VideoContext {
  98   // Video segments to annotate. The segments may overlap and are not required
  99   // to be contiguous or span the whole video. If unspecified, each video is
 100   // treated as a single segment.
 101   repeated VideoSegment segments = 1;
 102
 103   // Config for LABEL_DETECTION.
 104   LabelDetectionConfig label_detection_config = 2;
 105
 106   // Config for SHOT_CHANGE_DETECTION.
 107   ShotChangeDetectionConfig shot_change_detection_config = 3;
 108
 109   // Config for EXPLICIT_CONTENT_DETECTION.
 110   ExplicitContentDetectionConfig explicit_content_detection_config = 4;
 111
 112   // Config for TEXT_DETECTION.
 113   TextDetectionConfig text_detection_config = 8;
 114 }
 115
 116 // Config for LABEL_DETECTION.
 117 message LabelDetectionConfig {
 118   // What labels should be detected with LABEL_DETECTION, in addition to
 119   // video-level labels or segment-level labels.
 120   // If unspecified, defaults to `SHOT_MODE`.
 121   LabelDetectionMode label_detection_mode = 1;
 122
 123   // Whether the video has been shot from a stationary (i.e. non-moving) camera.
 124   // When set to true, might improve detection accuracy for moving objects.
 125   // Should be used with `SHOT_AND_FRAME_MODE` enabled.
 126   bool stationary_camera = 2;
 127
 128   // Model to use for label detection.
 129   // Supported values: "builtin/stable" (the default if unset) and
 130   // "builtin/latest".
 131   string model = 3;
 132 }
 133
 134 // Config for SHOT_CHANGE_DETECTION.
 135 message ShotChangeDetectionConfig {
 136   // Model to use for shot change detection.
 137   // Supported values: "builtin/stable" (the default if unset) and
 138   // "builtin/latest".
 139   string model = 1;
 140 }
 141
 142 // Config for EXPLICIT_CONTENT_DETECTION.
 143 message ExplicitContentDetectionConfig {
 144   // Model to use for explicit content detection.
 145   // Supported values: "builtin/stable" (the default if unset) and
 146   // "builtin/latest".
 147   string model = 1;
 148 }
 149
 150 // Config for TEXT_DETECTION.
 151 message TextDetectionConfig {
 152   // Language hint can be specified if the language to be detected is known a
 153   // priori. It can increase the accuracy of the detection. Language hint must
 154   // be language code in BCP-47 format.
 155   //
 156   // Automatic language detection is performed if no hint is provided.
 157   repeated string language_hints = 1;
 158 }
 159
 160 // Video segment.
 161 message VideoSegment {
 162   // Time-offset, relative to the beginning of the video,
 163   // corresponding to the start of the segment (inclusive).
 164   google.protobuf.Duration start_time_offset = 1;
 165
 166   // Time-offset, relative to the beginning of the video,
 167   // corresponding to the end of the segment (inclusive).
 168   google.protobuf.Duration end_time_offset = 2;
 169 }
 170
 171 // Video segment level annotation results for label detection.
 172 message LabelSegment {
 173   // Video segment where a label was detected.
 174   VideoSegment segment = 1;
 175
 176   // Confidence that the label is accurate. Range: [0, 1].
 177   float confidence = 2;
 178 }
 179
 180 // Video frame level annotation results for label detection.
 181 message LabelFrame {
 182   // Time-offset, relative to the beginning of the video, corresponding to the
 183   // video frame for this location.
 184   google.protobuf.Duration time_offset = 1;
 185
 186   // Confidence that the label is accurate. Range: [0, 1].
 187   float confidence = 2;
 188 }
 189
 190 // Detected entity from video analysis.
 191 message Entity {
 192   // Opaque entity ID. Some IDs may be available in
 193   // [Google Knowledge Graph Search
 194   // API](https://developers.google.com/knowledge-graph/).
 195   string entity_id = 1;
 196
 197   // Textual description, e.g. `Fixed-gear bicycle`.
 198   string description = 2;
 199
 200   // Language code for `description` in BCP-47 format.
 201   string language_code = 3;
 202 }
 203
 204 // Label annotation.
 205 message LabelAnnotation {
 206   // Detected entity.
 207   Entity entity = 1;
 208
 209   // Common categories for the detected entity.
 210   // E.g. when the label is `Terrier` the category is likely `dog`. And in some
 211   // cases there might be more than one categories e.g. `Terrier` could also be
 212   // a `pet`.
 213   repeated Entity category_entities = 2;
 214
 215   // All video segments where a label was detected.
 216   repeated LabelSegment segments = 3;
 217
 218   // All video frames where a label was detected.
 219   repeated LabelFrame frames = 4;
 220 }
 221
 222 // Video frame level annotation results for explicit content.
 223 message ExplicitContentFrame {
 224   // Time-offset, relative to the beginning of the video, corresponding to the
 225   // video frame for this location.
 226   google.protobuf.Duration time_offset = 1;
 227
 228   // Likelihood of the pornography content..
 229   Likelihood pornography_likelihood = 2;
 230 }
 231
 232 // Explicit content annotation (based on per-frame visual signals only).
 233 // If no explicit content has been detected in a frame, no annotations are
 234 // present for that frame.
 235 message ExplicitContentAnnotation {
 236   // All video frames where explicit content was detected.
 237   repeated ExplicitContentFrame frames = 1;
 238 }
 239
 240 // Normalized bounding box.
 241 // The normalized vertex coordinates are relative to the original image.
 242 // Range: [0, 1].
 243 message NormalizedBoundingBox {
 244   // Left X coordinate.
 245   float left = 1;
 246
 247   // Top Y coordinate.
 248   float top = 2;
 249
 250   // Right X coordinate.
 251   float right = 3;
 252
 253   // Bottom Y coordinate.
 254   float bottom = 4;
 255 }
 256
 257 // Annotation results for a single video.
 258 message VideoAnnotationResults {
 259   // Video file location in
 260   // [Google Cloud Storage](https://cloud.google.com/storage/).
 261   string input_uri = 1;
 262
 263   // Label annotations on video level or user specified segment level.
 264   // There is exactly one element for each unique label.
 265   repeated LabelAnnotation segment_label_annotations = 2;
 266
 267   // Label annotations on shot level.
 268   // There is exactly one element for each unique label.
 269   repeated LabelAnnotation shot_label_annotations = 3;
 270
 271   // Label annotations on frame level.
 272   // There is exactly one element for each unique label.
 273   repeated LabelAnnotation frame_label_annotations = 4;
 274
 275   // Shot annotations. Each shot is represented as a video segment.
 276   repeated VideoSegment shot_annotations = 6;
 277
 278   // Explicit content annotation.
 279   ExplicitContentAnnotation explicit_annotation = 7;
 280
 281   // OCR text detection and tracking.
 282   // Annotations for list of detected text snippets. Each will have list of
 283   // frame information associated with it.
 284   repeated TextAnnotation text_annotations = 12;
 285
 286   // Annotations for list of objects detected and tracked in video.
 287   repeated ObjectTrackingAnnotation object_annotations = 14;
 288
 289   // If set, indicates an error. Note that for a single `AnnotateVideoRequest`
 290   // some videos may succeed and some may fail.
 291   google.rpc.Status error = 9;
 292 }
 293
 294 // Video annotation response. Included in the `response`
 295 // field of the `Operation` returned by the `GetOperation`
 296 // call of the `google::longrunning::Operations` service.
 297 message AnnotateVideoResponse {
 298   // Annotation results for all videos specified in `AnnotateVideoRequest`.
 299   repeated VideoAnnotationResults annotation_results = 1;
 300 }
 301
 302 // Annotation progress for a single video.
 303 message VideoAnnotationProgress {
 304   // Video file location in
 305   // [Google Cloud Storage](https://cloud.google.com/storage/).
 306   string input_uri = 1;
 307
 308   // Approximate percentage processed thus far. Guaranteed to be
 309   // 100 when fully processed.
 310   int32 progress_percent = 2;
 311
 312   // Time when the request was received.
 313   google.protobuf.Timestamp start_time = 3;
 314
 315   // Time of the most recent update.
 316   google.protobuf.Timestamp update_time = 4;
 317 }
 318
 319 // Video annotation progress. Included in the `metadata`
 320 // field of the `Operation` returned by the `GetOperation`
 321 // call of the `google::longrunning::Operations` service.
 322 message AnnotateVideoProgress {
 323   // Progress metadata for all videos specified in `AnnotateVideoRequest`.
 324   repeated VideoAnnotationProgress annotation_progress = 1;
 325 }
 326
 327 // A vertex represents a 2D point in the image.
 328 // NOTE: the normalized vertex coordinates are relative to the original image
 329 // and range from 0 to 1.
 330 message NormalizedVertex {
 331   // X coordinate.
 332   float x = 1;
 333
 334   // Y coordinate.
 335   float y = 2;
 336 }
 337
 338 // Normalized bounding polygon for text (that might not be aligned with axis).
 339 // Contains list of the corner points in clockwise order starting from
 340 // top-left corner. For example, for a rectangular bounding box:
 341 // When the text is horizontal it might look like:
 342 //         0----1
 343 //         |    |
 344 //         3----2
 345 //
 346 // When it's clockwise rotated 180 degrees around the top-left corner it
 347 // becomes:
 348 //         2----3
 349 //         |    |
 350 //         1----0
 351 //
 352 // and the vertex order will still be (0, 1, 2, 3). Note that values can be less
 353 // than 0, or greater than 1 due to trignometric calculations for location of
 354 // the box.
 355 message NormalizedBoundingPoly {
 356   // Normalized vertices of the bounding polygon.
 357   repeated NormalizedVertex vertices = 1;
 358 }
 359
 360 // Video segment level annotation results for text detection.
 361 message TextSegment {
 362   // Video segment where a text snippet was detected.
 363   VideoSegment segment = 1;
 364
 365   // Confidence for the track of detected text. It is calculated as the highest
 366   // over all frames where OCR detected text appears.
 367   float confidence = 2;
 368
 369   // Information related to the frames where OCR detected text appears.
 370   repeated TextFrame frames = 3;
 371 }
 372
 373 // Video frame level annotation results for text annotation (OCR).
 374 // Contains information regarding timestamp and bounding box locations for the
 375 // frames containing detected OCR text snippets.
 376 message TextFrame {
 377   // Bounding polygon of the detected text for this frame.
 378   NormalizedBoundingPoly rotated_bounding_box = 1;
 379
 380   // Timestamp of this frame.
 381   google.protobuf.Duration time_offset = 2;
 382 }
 383
 384 // Annotations related to one detected OCR text snippet. This will contain the
 385 // corresponding text, confidence value, and frame level information for each
 386 // detection.
 387 message TextAnnotation {
 388   // The detected text.
 389   string text = 1;
 390
 391   // All video segments where OCR detected text appears.
 392   repeated TextSegment segments = 2;
 393 }
 394
 395 // Video frame level annotations for object detection and tracking. This field
 396 // stores per frame location, time offset, and confidence.
 397 message ObjectTrackingFrame {
 398   // The normalized bounding box location of this object track for the frame.
 399   NormalizedBoundingBox normalized_bounding_box = 1;
 400
 401   // The timestamp of the frame in microseconds.
 402   google.protobuf.Duration time_offset = 2;
 403 }
 404
 405 // Annotations corresponding to one tracked object.
 406 message ObjectTrackingAnnotation {
 407   // Entity to specify the object category that this track is labeled as.
 408   Entity entity = 1;
 409
 410   // Object category's labeling confidence of this track.
 411   float confidence = 4;
 412
 413   // Information corresponding to all frames where this object track appears.
 414   // Non-streaming batch mode: it may be one or multiple ObjectTrackingFrame
 415   // messages in frames.
 416   // Streaming mode: it can only be one ObjectTrackingFrame message in frames.
 417   repeated ObjectTrackingFrame frames = 2;
 418
 419   // Different representation of tracking info in non-streaming batch
 420   // and streaming modes.
 421   oneof track_info {
 422     // Non-streaming batch mode ONLY.
 423     // Each object track corresponds to one video segment where it appears.
 424     VideoSegment segment = 3;
 425     // Streaming mode ONLY.
 426     // In streaming mode, we do not know the end time of a tracked object
 427     // before it is completed. Hence, there is no VideoSegment info returned.
 428     // Instead, we provide a unique identifiable integer track_id so that
 429     // the customers can correlate the results of the ongoing
 430     // ObjectTrackAnnotation of the same track_id over time.
 431     int64 track_id = 5;
 432   }
 433 }
 434
 435 // The top-level message sent by the client for the `StreamingAnnotateVideo`
 436 // method. Multiple `StreamingAnnotateVideoRequest` messages are sent.
 437 // The first message must only contain a `StreamingVideoConfig` message.
 438 // All subsequent messages must only contain `input_content` data.
 439 message StreamingAnnotateVideoRequest {
 440   // *Required* The streaming request, which is either a streaming config or
 441   // video content.
 442   oneof streaming_request {
 443     // Provides information to the annotator, specifing how to process the
 444     // request. The first `AnnotateStreamingVideoRequest` message must only
 445     // contain a `video_config` message.
 446     StreamingVideoConfig video_config = 1;
 447
 448     // The video data to be annotated. Chunks of video data are sequentially
 449     // sent in `StreamingAnnotateVideoRequest` messages. Except the initial
 450     // `StreamingAnnotateVideoRequest` message containing only
 451     // `video_config`, all subsequent `AnnotateStreamingVideoRequest`
 452     // messages must only contain `input_content` field.
 453     bytes input_content = 2;
 454   }
 455 }
 456
 457 // `StreamingAnnotateVideoResponse` is the only message returned to the client
 458 // by `StreamingAnnotateVideo`. A series of zero or more
 459 // `StreamingAnnotateVideoResponse` messages are streamed back to the client.
 460 message StreamingAnnotateVideoResponse {
 461   // If set, returns a [google.rpc.Status][] message that
 462   // specifies the error for the operation.
 463   google.rpc.Status error = 1;
 464
 465   // Streaming annotation results.
 466   StreamingVideoAnnotationResults annotation_results = 2;
 467
 468   // GCS URI that stores annotation results of one streaming session.
 469   // It is a directory that can hold multiple files in JSON format.
 470   // Example uri format:
 471   // gs://bucket_id/object_id/cloud_project_name-session_id
 472   string annotation_results_uri = 3;
 473 }
 474
 475 // Config for EXPLICIT_CONTENT_DETECTION in streaming mode.
 476 message StreamingExplicitContentDetectionConfig {
 477   // No customized config support.
 478 }
 479
 480 // Config for LABEL_DETECTION in streaming mode.
 481 message StreamingLabelDetectionConfig {
 482   // Whether the video has been captured from a stationary (i.e. non-moving)
 483   // camera. When set to true, might improve detection accuracy for moving
 484   // objects. Default: false.
 485   bool stationary_camera = 1;
 486 }
 487
 488 // Config for STREAMING_OBJECT_TRACKING.
 489 message StreamingObjectTrackingConfig {
 490   // No customized config support.
 491 }
 492
 493 // Config for SHOT_CHANGE_DETECTION in streaming mode.
 494 message StreamingShotChangeDetectionConfig {
 495   // No customized config support.
 496 }
 497
 498 // Config for streaming storage option.
 499 message StreamingStorageConfig {
 500   // Enable streaming storage. Default: false.
 501   bool enable_storage_annotation_result = 1;
 502
 503   // GCS URI to store all annotation results for one client. Client should
 504   // specify this field as the top-level storage directory. Annotation results
 505   // of different sessions will be put into different sub-directories denoted
 506   // by project_name and session_id. All sub-directories will be auto generated
 507   // by program and will be made accessible to client in response proto.
 508   // URIs must be specified in the following format: `gs://bucket-id/object-id`
 509   // `bucket-id` should be a valid GCS bucket created by client and bucket
 510   // permission shall also be configured properly. `object-id` can be arbitrary
 511   // string that make sense to client. Other URI formats will return error and
 512   // cause GCS write failure.
 513   string annotation_result_storage_directory = 3;
 514 }
 515
 516 // Streaming annotation results corresponding to a portion of the video
 517 // that is currently being processed.
 518 message StreamingVideoAnnotationResults {
 519   // Shot annotation results. Each shot is represented as a video segment.
 520   repeated VideoSegment shot_annotations = 1;
 521
 522   // Label annotation results.
 523   repeated LabelAnnotation label_annotations = 2;
 524
 525   // Explicit content detection results.
 526   ExplicitContentAnnotation explicit_annotation = 3;
 527
 528   // Object tracking results.
 529   repeated ObjectTrackingAnnotation object_annotations = 4;
 530 }
 531
 532 // Provides information to the annotator that specifies how to process the
 533 // request.
 534 message StreamingVideoConfig {
 535   // Requested annotation feature.
 536   StreamingFeature feature = 1;
 537
 538   // Config for requested annotation feature.
 539   oneof streaming_config {
 540     // Config for SHOT_CHANGE_DETECTION.
 541     StreamingShotChangeDetectionConfig shot_change_detection_config = 2;
 542
 543     // Config for LABEL_DETECTION.
 544     StreamingLabelDetectionConfig label_detection_config = 3;
 545
 546     // Config for STREAMING_EXPLICIT_CONTENT_DETECTION.
 547     StreamingExplicitContentDetectionConfig explicit_content_detection_config =
 548         4;
 549
 550     // Config for STREAMING_OBJECT_TRACKING.
 551     StreamingObjectTrackingConfig object_tracking_config = 5;
 552   }
 553
 554   // Streaming storage option. By default: storage is disabled.
 555   StreamingStorageConfig storage_config = 30;
 556 }
 557
 558 // Video annotation feature.
 559 enum Feature {
 560   // Unspecified.
 561   FEATURE_UNSPECIFIED = 0;
 562
 563   // Label detection. Detect objects, such as dog or flower.
 564   LABEL_DETECTION = 1;
 565
 566   // Shot change detection.
 567   SHOT_CHANGE_DETECTION = 2;
 568
 569   // Explicit content detection.
 570   EXPLICIT_CONTENT_DETECTION = 3;
 571
 572   // OCR text detection and tracking.
 573   TEXT_DETECTION = 7;
 574
 575   // Object detection and tracking.
 576   OBJECT_TRACKING = 9;
 577 }
 578
 579 // Label detection mode.
 580 enum LabelDetectionMode {
 581   // Unspecified.
 582   LABEL_DETECTION_MODE_UNSPECIFIED = 0;
 583
 584   // Detect shot-level labels.
 585   SHOT_MODE = 1;
 586
 587   // Detect frame-level labels.
 588   FRAME_MODE = 2;
 589
 590   // Detect both shot-level and frame-level labels.
 591   SHOT_AND_FRAME_MODE = 3;
 592 }
 593
 594 // Bucketized representation of likelihood.
 595 enum Likelihood {
 596   // Unspecified likelihood.
 597   LIKELIHOOD_UNSPECIFIED = 0;
 598
 599   // Very unlikely.
 600   VERY_UNLIKELY = 1;
 601
 602   // Unlikely.
 603   UNLIKELY = 2;
 604
 605   // Possible.
 606   POSSIBLE = 3;
 607
 608   // Likely.
 609   LIKELY = 4;
 610
 611   // Very likely.
 612   VERY_LIKELY = 5;
 613 }
 614
 615 // Streaming video annotation feature.
 616 enum StreamingFeature {
 617   // Unspecified.
 618   STREAMING_FEATURE_UNSPECIFIED = 0;
 619   // Label detection. Detect objects, such as dog or flower.
 620   STREAMING_LABEL_DETECTION = 1;
 621   // Shot change detection.
 622   STREAMING_SHOT_CHANGE_DETECTION = 2;
 623   // Explicit content detection.
 624   STREAMING_EXPLICIT_CONTENT_DETECTION = 3;
 625   // Object detection and tracking.
 626   STREAMING_OBJECT_TRACKING = 4;
 627 }