legacy-libs/google-proto-files/google/cloud/videointelligence/v1beta1/video_intelligence.proto

   1 // Copyright 2017 Google Inc.
   2 //
   3 // Licensed under the Apache License, Version 2.0 (the "License");
   4 // you may not use this file except in compliance with the License.
   5 // You may obtain a copy of the License at
   6 //
   7 //     http://www.apache.org/licenses/LICENSE-2.0
   8 //
   9 // Unless required by applicable law or agreed to in writing, software
  10 // distributed under the License is distributed on an "AS IS" BASIS,
  11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 // See the License for the specific language governing permissions and
  13 // limitations under the License.
  14
  15 syntax = "proto3";
  16
  17 package google.cloud.videointelligence.v1beta1;
  18
  19 import "google/api/annotations.proto";
  20 import "google/longrunning/operations.proto";
  21 import "google/protobuf/timestamp.proto";
  22 import "google/rpc/status.proto";
  23
  24 option csharp_namespace = "Google.Cloud.VideoIntelligence.V1Beta1";
  25 option go_package = "google.golang.org/genproto/googleapis/cloud/videointelligence/v1beta1;videointelligence";
  26 option java_multiple_files = true;
  27 option java_outer_classname = "VideoIntelligenceServiceProto";
  28 option java_package = "com.google.cloud.videointelligence.v1beta1";
  29 option php_namespace = "Google\\Cloud\\VideoIntelligence\\V1beta1";
  30
  31 // Service that implements Google Cloud Video Intelligence API.
  32 service VideoIntelligenceService {
  33   // Performs asynchronous video annotation. Progress and results can be
  34   // retrieved through the `google.longrunning.Operations` interface.
  35   // `Operation.metadata` contains `AnnotateVideoProgress` (progress).
  36   // `Operation.response` contains `AnnotateVideoResponse` (results).
  37   rpc AnnotateVideo(AnnotateVideoRequest)
  38       returns (google.longrunning.Operation) {
  39     option (google.api.http) = {
  40       post: "/v1beta1/videos:annotate"
  41       body: "*"
  42     };
  43   }
  44 }
  45
  46 // Video annotation request.
  47 message AnnotateVideoRequest {
  48   // Input video location. Currently, only
  49   // [Google Cloud Storage](https://cloud.google.com/storage/) URIs are
  50   // supported, which must be specified in the following format:
  51   // `gs://bucket-id/object-id` (other URI formats return
  52   // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For
  53   // more information, see [Request URIs](/storage/docs/reference-uris). A video
  54   // URI may include wildcards in `object-id`, and thus identify multiple
  55   // videos. Supported wildcards: '*' to match 0 or more characters;
  56   // '?' to match 1 character. If unset, the input video should be embedded
  57   // in the request as `input_content`. If set, `input_content` should be unset.
  58   string input_uri = 1;
  59
  60   // The video data bytes. Encoding: base64. If unset, the input video(s)
  61   // should be specified via `input_uri`. If set, `input_uri` should be unset.
  62   string input_content = 6;
  63
  64   // Requested video annotation features.
  65   repeated Feature features = 2;
  66
  67   // Additional video context and/or feature-specific parameters.
  68   VideoContext video_context = 3;
  69
  70   // Optional location where the output (in JSON format) should be stored.
  71   // Currently, only [Google Cloud Storage](https://cloud.google.com/storage/)
  72   // URIs are supported, which must be specified in the following format:
  73   // `gs://bucket-id/object-id` (other URI formats return
  74   // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For
  75   // more information, see [Request URIs](/storage/docs/reference-uris).
  76   string output_uri = 4;
  77
  78   // Optional cloud region where annotation should take place. Supported cloud
  79   // regions: `us-east1`, `us-west1`, `europe-west1`, `asia-east1`. If no region
  80   // is specified, a region will be determined based on video file location.
  81   string location_id = 5;
  82 }
  83
  84 // Video context and/or feature-specific parameters.
  85 message VideoContext {
  86   // Video segments to annotate. The segments may overlap and are not required
  87   // to be contiguous or span the whole video. If unspecified, each video
  88   // is treated as a single segment.
  89   repeated VideoSegment segments = 1;
  90
  91   // If label detection has been requested, what labels should be detected
  92   // in addition to video-level labels or segment-level labels. If unspecified,
  93   // defaults to `SHOT_MODE`.
  94   LabelDetectionMode label_detection_mode = 2;
  95
  96   // Whether the video has been shot from a stationary (i.e. non-moving) camera.
  97   // When set to true, might improve detection accuracy for moving objects.
  98   bool stationary_camera = 3;
  99
 100   // Model to use for label detection.
 101   // Supported values: "latest" and "stable" (the default).
 102   string label_detection_model = 4;
 103
 104   // Model to use for face detection.
 105   // Supported values: "latest" and "stable" (the default).
 106   string face_detection_model = 5;
 107
 108   // Model to use for shot change detection.
 109   // Supported values: "latest" and "stable" (the default).
 110   string shot_change_detection_model = 6;
 111
 112   // Model to use for safe search detection.
 113   // Supported values: "latest" and "stable" (the default).
 114   string safe_search_detection_model = 7;
 115 }
 116
 117 // Video segment.
 118 message VideoSegment {
 119   // Start offset in microseconds (inclusive). Unset means 0.
 120   int64 start_time_offset = 1;
 121
 122   // End offset in microseconds (inclusive). Unset means 0.
 123   int64 end_time_offset = 2;
 124 }
 125
 126 // Label location.
 127 message LabelLocation {
 128   // Video segment. Set to [-1, -1] for video-level labels.
 129   // Set to [timestamp, timestamp] for frame-level labels.
 130   // Otherwise, corresponds to one of `AnnotateSpec.segments`
 131   // (if specified) or to shot boundaries (if requested).
 132   VideoSegment segment = 1;
 133
 134   // Confidence that the label is accurate. Range: [0, 1].
 135   float confidence = 2;
 136
 137   // Label level.
 138   LabelLevel level = 3;
 139 }
 140
 141 // Label annotation.
 142 message LabelAnnotation {
 143   // Textual description, e.g. `Fixed-gear bicycle`.
 144   string description = 1;
 145
 146   // Language code for `description` in BCP-47 format.
 147   string language_code = 2;
 148
 149   // Where the label was detected and with what confidence.
 150   repeated LabelLocation locations = 3;
 151 }
 152
 153 // Safe search annotation (based on per-frame visual signals only).
 154 // If no unsafe content has been detected in a frame, no annotations
 155 // are present for that frame. If only some types of unsafe content
 156 // have been detected in a frame, the likelihood is set to `UNKNOWN`
 157 // for all other types of unsafe content.
 158 message SafeSearchAnnotation {
 159   // Likelihood of adult content.
 160   Likelihood adult = 1;
 161
 162   // Likelihood that an obvious modification was made to the original
 163   // version to make it appear funny or offensive.
 164   Likelihood spoof = 2;
 165
 166   // Likelihood of medical content.
 167   Likelihood medical = 3;
 168
 169   // Likelihood of violent content.
 170   Likelihood violent = 4;
 171
 172   // Likelihood of racy content.
 173   Likelihood racy = 5;
 174
 175   // Video time offset in microseconds.
 176   int64 time_offset = 6;
 177 }
 178
 179 // Bounding box.
 180 message BoundingBox {
 181   // Left X coordinate.
 182   int32 left = 1;
 183
 184   // Right X coordinate.
 185   int32 right = 2;
 186
 187   // Bottom Y coordinate.
 188   int32 bottom = 3;
 189
 190   // Top Y coordinate.
 191   int32 top = 4;
 192 }
 193
 194 // Face location.
 195 message FaceLocation {
 196   // Bounding box in a frame.
 197   BoundingBox bounding_box = 1;
 198
 199   // Video time offset in microseconds.
 200   int64 time_offset = 2;
 201 }
 202
 203 // Face annotation.
 204 message FaceAnnotation {
 205   // Thumbnail of a representative face view (in JPEG format). Encoding: base64.
 206   string thumbnail = 1;
 207
 208   // All locations where a face was detected.
 209   // Faces are detected and tracked on a per-video basis
 210   // (as opposed to across multiple videos).
 211   repeated VideoSegment segments = 2;
 212
 213   // Face locations at one frame per second.
 214   repeated FaceLocation locations = 3;
 215 }
 216
 217 // Annotation results for a single video.
 218 message VideoAnnotationResults {
 219   // Video file location in
 220   // [Google Cloud Storage](https://cloud.google.com/storage/).
 221   string input_uri = 1;
 222
 223   // Label annotations. There is exactly one element for each unique label.
 224   repeated LabelAnnotation label_annotations = 2;
 225
 226   // Face annotations. There is exactly one element for each unique face.
 227   repeated FaceAnnotation face_annotations = 3;
 228
 229   // Shot annotations. Each shot is represented as a video segment.
 230   repeated VideoSegment shot_annotations = 4;
 231
 232   // Safe search annotations.
 233   repeated SafeSearchAnnotation safe_search_annotations = 6;
 234
 235   // If set, indicates an error. Note that for a single `AnnotateVideoRequest`
 236   // some videos may succeed and some may fail.
 237   google.rpc.Status error = 5;
 238 }
 239
 240 // Video annotation response. Included in the `response`
 241 // field of the `Operation` returned by the `GetOperation`
 242 // call of the `google::longrunning::Operations` service.
 243 message AnnotateVideoResponse {
 244   // Annotation results for all videos specified in `AnnotateVideoRequest`.
 245   repeated VideoAnnotationResults annotation_results = 1;
 246 }
 247
 248 // Annotation progress for a single video.
 249 message VideoAnnotationProgress {
 250   // Video file location in
 251   // [Google Cloud Storage](https://cloud.google.com/storage/).
 252   string input_uri = 1;
 253
 254   // Approximate percentage processed thus far.
 255   // Guaranteed to be 100 when fully processed.
 256   int32 progress_percent = 2;
 257
 258   // Time when the request was received.
 259   google.protobuf.Timestamp start_time = 3;
 260
 261   // Time of the most recent update.
 262   google.protobuf.Timestamp update_time = 4;
 263 }
 264
 265 // Video annotation progress. Included in the `metadata`
 266 // field of the `Operation` returned by the `GetOperation`
 267 // call of the `google::longrunning::Operations` service.
 268 message AnnotateVideoProgress {
 269   // Progress metadata for all videos specified in `AnnotateVideoRequest`.
 270   repeated VideoAnnotationProgress annotation_progress = 1;
 271 }
 272
 273 // Video annotation feature.
 274 enum Feature {
 275   // Unspecified.
 276   FEATURE_UNSPECIFIED = 0;
 277
 278   // Label detection. Detect objects, such as dog or flower.
 279   LABEL_DETECTION = 1;
 280
 281   // Human face detection and tracking.
 282   FACE_DETECTION = 2;
 283
 284   // Shot change detection.
 285   SHOT_CHANGE_DETECTION = 3;
 286
 287   // Safe search detection.
 288   SAFE_SEARCH_DETECTION = 4;
 289 }
 290
 291 // Label level (scope).
 292 enum LabelLevel {
 293   // Unspecified.
 294   LABEL_LEVEL_UNSPECIFIED = 0;
 295
 296   // Video-level. Corresponds to the whole video.
 297   VIDEO_LEVEL = 1;
 298
 299   // Segment-level. Corresponds to one of `AnnotateSpec.segments`.
 300   SEGMENT_LEVEL = 2;
 301
 302   // Shot-level. Corresponds to a single shot (i.e. a series of frames
 303   // without a major camera position or background change).
 304   SHOT_LEVEL = 3;
 305
 306   // Frame-level. Corresponds to a single video frame.
 307   FRAME_LEVEL = 4;
 308 }
 309
 310 // Label detection mode.
 311 enum LabelDetectionMode {
 312   // Unspecified.
 313   LABEL_DETECTION_MODE_UNSPECIFIED = 0;
 314
 315   // Detect shot-level labels.
 316   SHOT_MODE = 1;
 317
 318   // Detect frame-level labels.
 319   FRAME_MODE = 2;
 320
 321   // Detect both shot-level and frame-level labels.
 322   SHOT_AND_FRAME_MODE = 3;
 323 }
 324
 325 // Bucketized representation of likelihood.
 326 enum Likelihood {
 327   // Unknown likelihood.
 328   UNKNOWN = 0;
 329
 330   // Very unlikely.
 331   VERY_UNLIKELY = 1;
 332
 333   // Unlikely.
 334   UNLIKELY = 2;
 335
 336   // Possible.
 337   POSSIBLE = 3;
 338
 339   // Likely.
 340   LIKELY = 4;
 341
 342   // Very likely.
 343   VERY_LIKELY = 5;
 344 }