1 // Copyright 2018 Google LLC.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
18 package google.cloud.dataproc.v1;
20 import "google/api/annotations.proto";
21 import "google/cloud/dataproc/v1/operations.proto";
22 import "google/longrunning/operations.proto";
23 import "google/protobuf/duration.proto";
24 import "google/protobuf/field_mask.proto";
25 import "google/protobuf/timestamp.proto";
27 option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc";
28 option java_multiple_files = true;
29 option java_outer_classname = "ClustersProto";
30 option java_package = "com.google.cloud.dataproc.v1";
32 // The ClusterControllerService provides methods to manage clusters
33 // of Compute Engine instances.
34 service ClusterController {
35 // Creates a cluster in a project.
36 rpc CreateCluster(CreateClusterRequest)
37 returns (google.longrunning.Operation) {
38 option (google.api.http) = {
39 post: "/v1/projects/{project_id}/regions/{region}/clusters"
44 // Updates a cluster in a project.
45 rpc UpdateCluster(UpdateClusterRequest)
46 returns (google.longrunning.Operation) {
47 option (google.api.http) = {
48 patch: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}"
53 // Deletes a cluster in a project.
54 rpc DeleteCluster(DeleteClusterRequest)
55 returns (google.longrunning.Operation) {
56 option (google.api.http) = {
57 delete: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}"
61 // Gets the resource representation for a cluster in a project.
62 rpc GetCluster(GetClusterRequest) returns (Cluster) {
63 option (google.api.http) = {
64 get: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}"
68 // Lists all regions/{region}/clusters in a project.
69 rpc ListClusters(ListClustersRequest) returns (ListClustersResponse) {
70 option (google.api.http) = {
71 get: "/v1/projects/{project_id}/regions/{region}/clusters"
75 // Gets cluster diagnostic information.
76 // After the operation completes, the Operation.response field
77 // contains `DiagnoseClusterOutputLocation`.
78 rpc DiagnoseCluster(DiagnoseClusterRequest)
79 returns (google.longrunning.Operation) {
80 option (google.api.http) = {
81 post: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}:diagnose"
87 // Describes the identifying information, config, and status of
88 // a cluster of Compute Engine instances.
90 // Required. The Google Cloud Platform project ID that the cluster belongs to.
91 string project_id = 1;
93 // Required. The cluster name. Cluster names within a project must be
94 // unique. Names of deleted clusters can be reused.
95 string cluster_name = 2;
97 // Required. The cluster config. Note that Cloud Dataproc may set
98 // default values, and values may change when clusters are updated.
99 ClusterConfig config = 3;
101 // Optional. The labels to associate with this cluster.
102 // Label **keys** must contain 1 to 63 characters, and must conform to
103 // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
104 // Label **values** may be empty, but, if present, must contain 1 to 63
105 // characters, and must conform to [RFC
106 // 1035](https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be
107 // associated with a cluster.
108 map<string, string> labels = 8;
110 // Output only. Cluster status.
111 ClusterStatus status = 4;
113 // Output only. The previous cluster status.
114 repeated ClusterStatus status_history = 7;
116 // Output only. A cluster UUID (Unique Universal Identifier). Cloud Dataproc
117 // generates this value when it creates the cluster.
118 string cluster_uuid = 6;
120 // Contains cluster daemon metrics such as HDFS and YARN stats.
122 // **Beta Feature**: This report is available for testing purposes only. It
123 // may be changed before final release.
124 ClusterMetrics metrics = 9;
127 // The cluster config.
128 message ClusterConfig {
129 // Optional. A Cloud Storage staging bucket used for sharing generated
130 // SSH keys and config. If you do not specify a staging bucket, Cloud
131 // Dataproc will determine an appropriate Cloud Storage location (US,
132 // ASIA, or EU) for your cluster's staging bucket according to the Google
133 // Compute Engine zone where your cluster is deployed, and then it will create
134 // and manage this project-level, per-location bucket for you.
135 string config_bucket = 1;
137 // Required. The shared Compute Engine config settings for
138 // all instances in a cluster.
139 GceClusterConfig gce_cluster_config = 8;
141 // Optional. The Compute Engine config settings for
142 // the master instance in a cluster.
143 InstanceGroupConfig master_config = 9;
145 // Optional. The Compute Engine config settings for
146 // worker instances in a cluster.
147 InstanceGroupConfig worker_config = 10;
149 // Optional. The Compute Engine config settings for
150 // additional worker instances in a cluster.
151 InstanceGroupConfig secondary_worker_config = 12;
153 // Optional. The config settings for software inside the cluster.
154 SoftwareConfig software_config = 13;
156 // Optional. Commands to execute on each node after config is
157 // completed. By default, executables are run on master and all worker nodes.
158 // You can test a node's `role` metadata to run an executable on
159 // a master or worker node, as shown below using `curl` (you can also use
162 // ROLE=$(curl -H Metadata-Flavor:Google
163 // http://metadata/computeMetadata/v1/instance/attributes/dataproc-role)
164 // if [[ "${ROLE}" == 'Master' ]]; then
165 // ... master specific actions ...
167 // ... worker specific actions ...
169 repeated NodeInitializationAction initialization_actions = 11;
171 // Optional. Encryption settings for the cluster.
172 EncryptionConfig encryption_config = 15;
175 // Encryption settings for the cluster.
176 message EncryptionConfig {
177 // Optional. The Cloud KMS key name to use for PD disk encryption for all
178 // instances in the cluster.
179 string gce_pd_kms_key_name = 1;
182 // Common config settings for resources of Compute Engine cluster
183 // instances, applicable to all instances in the cluster.
184 message GceClusterConfig {
185 // Optional. The zone where the Compute Engine cluster will be located.
186 // On a create request, it is required in the "global" region. If omitted
187 // in a non-global Cloud Dataproc region, the service will pick a zone in the
188 // corresponding Compute Engine region. On a get request, zone will
189 // always be present.
191 // A full URL, partial URI, or short name are valid. Examples:
193 // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]`
194 // * `projects/[project_id]/zones/[zone]`
198 // Optional. The Compute Engine network to be used for machine
199 // communications. Cannot be specified with subnetwork_uri. If neither
200 // `network_uri` nor `subnetwork_uri` is specified, the "default" network of
201 // the project is used, if it exists. Cannot be a "Custom Subnet Network" (see
202 // [Using Subnetworks](/compute/docs/subnetworks) for more information).
204 // A full URL, partial URI, or short name are valid. Examples:
206 // * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default`
207 // * `projects/[project_id]/regions/global/default`
209 string network_uri = 2;
211 // Optional. The Compute Engine subnetwork to be used for machine
212 // communications. Cannot be specified with network_uri.
214 // A full URL, partial URI, or short name are valid. Examples:
216 // * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/us-east1/sub0`
217 // * `projects/[project_id]/regions/us-east1/sub0`
219 string subnetwork_uri = 6;
221 // Optional. If true, all instances in the cluster will only have internal IP
222 // addresses. By default, clusters are not restricted to internal IP
223 // addresses, and will have ephemeral external IP addresses assigned to each
224 // instance. This `internal_ip_only` restriction can only be enabled for
225 // subnetwork enabled networks, and all off-cluster dependencies must be
226 // configured to be accessible without external IP addresses.
227 bool internal_ip_only = 7;
229 // Optional. The service account of the instances. Defaults to the default
230 // Compute Engine service account. Custom service accounts need
231 // permissions equivalent to the following IAM roles:
233 // * roles/logging.logWriter
234 // * roles/storage.objectAdmin
237 // https://cloud.google.com/compute/docs/access/service-accounts#custom_service_accounts
238 // for more information).
239 // Example: `[account_id]@[project_id].iam.gserviceaccount.com`
240 string service_account = 8;
242 // Optional. The URIs of service account scopes to be included in
243 // Compute Engine instances. The following base set of scopes is always
246 // * https://www.googleapis.com/auth/cloud.useraccounts.readonly
247 // * https://www.googleapis.com/auth/devstorage.read_write
248 // * https://www.googleapis.com/auth/logging.write
250 // If no scopes are specified, the following defaults are also provided:
252 // * https://www.googleapis.com/auth/bigquery
253 // * https://www.googleapis.com/auth/bigtable.admin.table
254 // * https://www.googleapis.com/auth/bigtable.data
255 // * https://www.googleapis.com/auth/devstorage.full_control
256 repeated string service_account_scopes = 3;
258 // The Compute Engine tags to add to all instances (see
259 // [Tagging instances](/compute/docs/label-or-tag-resources#tags)).
260 repeated string tags = 4;
262 // The Compute Engine metadata entries to add to all instances (see
263 // [Project and instance
264 // metadata](https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)).
265 map<string, string> metadata = 5;
268 // Optional. The config settings for Compute Engine resources in
269 // an instance group, such as a master or worker group.
270 message InstanceGroupConfig {
271 // Optional. The number of VM instances in the instance group.
272 // For master instance groups, must be set to 1.
273 int32 num_instances = 1;
275 // Output only. The list of instance names. Cloud Dataproc derives the names
276 // from `cluster_name`, `num_instances`, and the instance group.
277 repeated string instance_names = 2;
279 // Optional. The Compute Engine image resource used for cluster
280 // instances. It can be specified or may be inferred from
281 // `SoftwareConfig.image_version`.
282 string image_uri = 3;
284 // Optional. The Compute Engine machine type used for cluster instances.
286 // A full URL, partial URI, or short name are valid. Examples:
288 // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
289 // * `projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
292 // **Auto Zone Exception**: If you are using the Cloud Dataproc
294 // Placement](/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
295 // feature, you must use the short name of the machine type
296 // resource, for example, `n1-standard-2`.
297 string machine_type_uri = 4;
299 // Optional. Disk option config settings.
300 DiskConfig disk_config = 5;
302 // Optional. Specifies that this instance group contains preemptible
304 bool is_preemptible = 6;
306 // Output only. The config for Compute Engine Instance Group
307 // Manager that manages this group.
308 // This is only used for preemptible instance groups.
309 ManagedGroupConfig managed_group_config = 7;
311 // Optional. The Compute Engine accelerator configuration for these
314 // **Beta Feature**: This feature is still under development. It may be
315 // changed before final release.
316 repeated AcceleratorConfig accelerators = 8;
319 // Specifies the resources used to actively manage an instance group.
320 message ManagedGroupConfig {
321 // Output only. The name of the Instance Template used for the Managed
323 string instance_template_name = 1;
325 // Output only. The name of the Instance Group Manager for this group.
326 string instance_group_manager_name = 2;
329 // Specifies the type and number of accelerator cards attached to the instances
330 // of an instance. See [GPUs on Compute Engine](/compute/docs/gpus/).
331 message AcceleratorConfig {
332 // Full URL, partial URI, or short name of the accelerator type resource to
333 // expose to this instance. See
335 // AcceleratorTypes](/compute/docs/reference/beta/acceleratorTypes).
339 // * `https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
340 // * `projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
341 // * `nvidia-tesla-k80`
343 // **Auto Zone Exception**: If you are using the Cloud Dataproc
345 // Placement](/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
346 // feature, you must use the short name of the accelerator type
347 // resource, for example, `nvidia-tesla-k80`.
348 string accelerator_type_uri = 1;
350 // The number of the accelerator cards of this type exposed to this instance.
351 int32 accelerator_count = 2;
354 // Specifies the config of disk options for a group of VM instances.
356 // Optional. Type of the boot disk (default is "pd-standard").
357 // Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or
358 // "pd-standard" (Persistent Disk Hard Disk Drive).
359 string boot_disk_type = 3;
361 // Optional. Size in GB of the boot disk (default is 500GB).
362 int32 boot_disk_size_gb = 1;
364 // Optional. Number of attached SSDs, from 0 to 4 (default is 0).
365 // If SSDs are not attached, the boot disk is used to store runtime logs and
366 // [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data.
367 // If one or more SSDs are attached, this runtime bulk
368 // data is spread across them, and the boot disk contains only basic
369 // config and installed binaries.
370 int32 num_local_ssds = 2;
373 // Specifies an executable to run on a fully configured node and a
374 // timeout period for executable completion.
375 message NodeInitializationAction {
376 // Required. Cloud Storage URI of executable file.
377 string executable_file = 1;
379 // Optional. Amount of time executable has to complete. Default is
380 // 10 minutes. Cluster creation fails with an explanatory error message (the
381 // name of the executable that caused the error and the exceeded timeout
382 // period) if the executable is not completed at end of the timeout period.
383 google.protobuf.Duration execution_timeout = 2;
386 // The status of a cluster and its instances.
387 message ClusterStatus {
388 // The cluster state.
390 // The cluster state is unknown.
393 // The cluster is being created and set up. It is not ready for use.
396 // The cluster is currently running and healthy. It is ready for use.
399 // The cluster encountered an error. It is not ready for use.
402 // The cluster is being deleted. It cannot be used.
405 // The cluster is being updated. It continues to accept and process jobs.
409 // The cluster substate.
411 // The cluster substate is unknown.
414 // The cluster is known to be in an unhealthy state
415 // (for example, critical daemons are not running or HDFS capacity is
418 // Applies to RUNNING state.
421 // The agent-reported status is out of date (may occur if
422 // Cloud Dataproc loses communication with Agent).
424 // Applies to RUNNING state.
428 // Output only. The cluster's state.
431 // Output only. Optional details of cluster's state.
434 // Output only. Time when this state was entered.
435 google.protobuf.Timestamp state_start_time = 3;
437 // Output only. Additional state information that includes
438 // status reported by the agent.
439 Substate substate = 4;
442 // Specifies the selection and config of software inside the cluster.
443 message SoftwareConfig {
444 // Optional. The version of software inside the cluster. It must be one of the
445 // supported [Cloud Dataproc
446 // Versions](/dataproc/docs/concepts/versioning/dataproc-versions#supported_cloud_dataproc_versions),
447 // such as "1.2" (including a subminor version, such as "1.2.29"), or the
449 // version](/dataproc/docs/concepts/versioning/dataproc-versions#other_versions).
450 // If unspecified, it defaults to the latest version.
451 string image_version = 1;
453 // Optional. The properties to set on daemon config files.
455 // Property keys are specified in `prefix:property` format, such as
456 // `core:fs.defaultFS`. The following are supported prefixes
457 // and their mappings:
459 // * capacity-scheduler: `capacity-scheduler.xml`
460 // * core: `core-site.xml`
461 // * distcp: `distcp-default.xml`
462 // * hdfs: `hdfs-site.xml`
463 // * hive: `hive-site.xml`
464 // * mapred: `mapred-site.xml`
465 // * pig: `pig.properties`
466 // * spark: `spark-defaults.conf`
467 // * yarn: `yarn-site.xml`
469 // For more information, see
470 // [Cluster properties](/dataproc/docs/concepts/cluster-properties).
471 map<string, string> properties = 2;
474 // Contains cluster daemon metrics, such as HDFS and YARN stats.
476 // **Beta Feature**: This report is available for testing purposes only. It may
477 // be changed before final release.
478 message ClusterMetrics {
480 map<string, int64> hdfs_metrics = 1;
483 map<string, int64> yarn_metrics = 2;
486 // A request to create a cluster.
487 message CreateClusterRequest {
488 // Required. The ID of the Google Cloud Platform project that the cluster
490 string project_id = 1;
492 // Required. The Cloud Dataproc region in which to handle the request.
495 // Required. The cluster to create.
498 // Optional. A unique id used to identify the request. If the server
500 // [CreateClusterRequest][google.cloud.dataproc.v1.CreateClusterRequest]
501 // requests with the same id, then the second request will be ignored and the
502 // first [google.longrunning.Operation][google.longrunning.Operation] created
503 // and stored in the backend is returned.
505 // It is recommended to always set this value to a
506 // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
508 // The id must contain only letters (a-z, A-Z), numbers (0-9),
509 // underscores (_), and hyphens (-). The maximum length is 40 characters.
510 string request_id = 4;
513 // A request to update a cluster.
514 message UpdateClusterRequest {
515 // Required. The ID of the Google Cloud Platform project the
516 // cluster belongs to.
517 string project_id = 1;
519 // Required. The Cloud Dataproc region in which to handle the request.
522 // Required. The cluster name.
523 string cluster_name = 2;
525 // Required. The changes to the cluster.
528 // Optional. Timeout for graceful YARN decomissioning. Graceful
529 // decommissioning allows removing nodes from the cluster without
530 // interrupting jobs in progress. Timeout specifies how long to wait for jobs
531 // in progress to finish before forcefully removing nodes (and potentially
532 // interrupting jobs). Default timeout is 0 (for forceful decommission), and
533 // the maximum allowed timeout is 1 day.
535 // Only supported on Dataproc image versions 1.2 and higher.
536 google.protobuf.Duration graceful_decommission_timeout = 6;
538 // Required. Specifies the path, relative to `Cluster`, of
539 // the field to update. For example, to change the number of workers
540 // in a cluster to 5, the `update_mask` parameter would be
541 // specified as `config.worker_config.num_instances`,
542 // and the `PATCH` request body would specify the new value, as follows:
547 // "numInstances":"5"
551 // Similarly, to change the number of preemptible workers in a cluster to 5,
552 // the `update_mask` parameter would be
553 // `config.secondary_worker_config.num_instances`, and the `PATCH` request
554 // body would be set as follows:
558 // "secondaryWorkerConfig":{
559 // "numInstances":"5"
563 // <strong>Note:</strong> Currently, only the following fields can be updated:
568 // <td><strong>Mask</strong></td>
569 // <td><strong>Purpose</strong></td>
572 // <td><strong><em>labels</em></strong></td>
573 // <td>Update labels</td>
576 // <td><strong><em>config.worker_config.num_instances</em></strong></td>
577 // <td>Resize primary worker group</td>
580 // <td><strong><em>config.secondary_worker_config.num_instances</em></strong></td>
581 // <td>Resize secondary worker group</td>
585 google.protobuf.FieldMask update_mask = 4;
587 // Optional. A unique id used to identify the request. If the server
589 // [UpdateClusterRequest][google.cloud.dataproc.v1.UpdateClusterRequest]
590 // requests with the same id, then the second request will be ignored and the
591 // first [google.longrunning.Operation][google.longrunning.Operation] created
592 // and stored in the backend is returned.
594 // It is recommended to always set this value to a
595 // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
597 // The id must contain only letters (a-z, A-Z), numbers (0-9),
598 // underscores (_), and hyphens (-). The maximum length is 40 characters.
599 string request_id = 7;
602 // A request to delete a cluster.
603 message DeleteClusterRequest {
604 // Required. The ID of the Google Cloud Platform project that the cluster
606 string project_id = 1;
608 // Required. The Cloud Dataproc region in which to handle the request.
611 // Required. The cluster name.
612 string cluster_name = 2;
614 // Optional. Specifying the `cluster_uuid` means the RPC should fail
615 // (with error NOT_FOUND) if cluster with specified UUID does not exist.
616 string cluster_uuid = 4;
618 // Optional. A unique id used to identify the request. If the server
620 // [DeleteClusterRequest][google.cloud.dataproc.v1.DeleteClusterRequest]
621 // requests with the same id, then the second request will be ignored and the
622 // first [google.longrunning.Operation][google.longrunning.Operation] created
623 // and stored in the backend is returned.
625 // It is recommended to always set this value to a
626 // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
628 // The id must contain only letters (a-z, A-Z), numbers (0-9),
629 // underscores (_), and hyphens (-). The maximum length is 40 characters.
630 string request_id = 5;
633 // Request to get the resource representation for a cluster in a project.
634 message GetClusterRequest {
635 // Required. The ID of the Google Cloud Platform project that the cluster
637 string project_id = 1;
639 // Required. The Cloud Dataproc region in which to handle the request.
642 // Required. The cluster name.
643 string cluster_name = 2;
646 // A request to list the clusters in a project.
647 message ListClustersRequest {
648 // Required. The ID of the Google Cloud Platform project that the cluster
650 string project_id = 1;
652 // Required. The Cloud Dataproc region in which to handle the request.
655 // Optional. A filter constraining the clusters to list. Filters are
656 // case-sensitive and have the following syntax:
658 // field = value [AND [field = value]] ...
660 // where **field** is one of `status.state`, `clusterName`, or `labels.[KEY]`,
661 // and `[KEY]` is a label key. **value** can be `*` to match all values.
662 // `status.state` can be one of the following: `ACTIVE`, `INACTIVE`,
663 // `CREATING`, `RUNNING`, `ERROR`, `DELETING`, or `UPDATING`. `ACTIVE`
664 // contains the `CREATING`, `UPDATING`, and `RUNNING` states. `INACTIVE`
665 // contains the `DELETING` and `ERROR` states.
666 // `clusterName` is the name of the cluster provided at creation time.
667 // Only the logical `AND` operator is supported; space-separated items are
668 // treated as having an implicit `AND` operator.
672 // status.state = ACTIVE AND clusterName = mycluster
673 // AND labels.env = staging AND labels.starred = *
676 // Optional. The standard List page size.
679 // Optional. The standard List page token.
680 string page_token = 3;
683 // The list of all clusters in a project.
684 message ListClustersResponse {
685 // Output only. The clusters in the project.
686 repeated Cluster clusters = 1;
688 // Output only. This token is included in the response if there are more
689 // results to fetch. To fetch additional results, provide this value as the
690 // `page_token` in a subsequent `ListClustersRequest`.
691 string next_page_token = 2;
694 // A request to collect cluster diagnostic information.
695 message DiagnoseClusterRequest {
696 // Required. The ID of the Google Cloud Platform project that the cluster
698 string project_id = 1;
700 // Required. The Cloud Dataproc region in which to handle the request.
703 // Required. The cluster name.
704 string cluster_name = 2;
707 // The location of diagnostic output.
708 message DiagnoseClusterResults {
709 // Output only. The Cloud Storage URI of the diagnostic output.
710 // The output report is a plain text file with a summary of collected
712 string output_uri = 1;