push all website files

This commit is contained in:
Jacob Levine
2019-01-06 13:14:45 -06:00
parent d7301e26c3
commit d2d5d4c04e
15662 changed files with 2166516 additions and 0 deletions

View File

@@ -0,0 +1,597 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.dataproc.v1;
import "google/api/annotations.proto";
import "google/cloud/dataproc/v1/operations.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/field_mask.proto";
import "google/protobuf/timestamp.proto";
option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc";
option java_multiple_files = true;
option java_outer_classname = "ClustersProto";
option java_package = "com.google.cloud.dataproc.v1";
// The ClusterControllerService provides methods to manage clusters
// of Google Compute Engine instances.
service ClusterController {
// Creates a cluster in a project.
rpc CreateCluster(CreateClusterRequest) returns (google.longrunning.Operation) {
option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/clusters" body: "cluster" };
}
// Updates a cluster in a project.
rpc UpdateCluster(UpdateClusterRequest) returns (google.longrunning.Operation) {
option (google.api.http) = { patch: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}" body: "cluster" };
}
// Deletes a cluster in a project.
rpc DeleteCluster(DeleteClusterRequest) returns (google.longrunning.Operation) {
option (google.api.http) = { delete: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}" };
}
// Gets the resource representation for a cluster in a project.
rpc GetCluster(GetClusterRequest) returns (Cluster) {
option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}" };
}
// Lists all regions/{region}/clusters in a project.
rpc ListClusters(ListClustersRequest) returns (ListClustersResponse) {
option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/clusters" };
}
// Gets cluster diagnostic information.
// After the operation completes, the Operation.response field
// contains `DiagnoseClusterOutputLocation`.
rpc DiagnoseCluster(DiagnoseClusterRequest) returns (google.longrunning.Operation) {
option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}:diagnose" body: "*" };
}
}
// Describes the identifying information, config, and status of
// a cluster of Google Compute Engine instances.
message Cluster {
// Required. The Google Cloud Platform project ID that the cluster belongs to.
string project_id = 1;
// Required. The cluster name. Cluster names within a project must be
// unique. Names of deleted clusters can be reused.
string cluster_name = 2;
// Required. The cluster config. Note that Cloud Dataproc may set
// default values, and values may change when clusters are updated.
ClusterConfig config = 3;
// Optional. The labels to associate with this cluster.
// Label **keys** must contain 1 to 63 characters, and must conform to
// [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
// Label **values** may be empty, but, if present, must contain 1 to 63
// characters, and must conform to [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
// No more than 32 labels can be associated with a cluster.
map<string, string> labels = 8;
// Output-only. Cluster status.
ClusterStatus status = 4;
// Output-only. The previous cluster status.
repeated ClusterStatus status_history = 7;
// Output-only. A cluster UUID (Unique Universal Identifier). Cloud Dataproc
// generates this value when it creates the cluster.
string cluster_uuid = 6;
// Contains cluster daemon metrics such as HDFS and YARN stats.
//
// **Beta Feature**: This report is available for testing purposes only. It may
// be changed before final release.
ClusterMetrics metrics = 9;
}
// The cluster config.
message ClusterConfig {
// Optional. A Google Cloud Storage staging bucket used for sharing generated
// SSH keys and config. If you do not specify a staging bucket, Cloud
// Dataproc will determine an appropriate Cloud Storage location (US,
// ASIA, or EU) for your cluster's staging bucket according to the Google
// Compute Engine zone where your cluster is deployed, and then it will create
// and manage this project-level, per-location bucket for you.
string config_bucket = 1;
// Required. The shared Google Compute Engine config settings for
// all instances in a cluster.
GceClusterConfig gce_cluster_config = 8;
// Optional. The Google Compute Engine config settings for
// the master instance in a cluster.
InstanceGroupConfig master_config = 9;
// Optional. The Google Compute Engine config settings for
// worker instances in a cluster.
InstanceGroupConfig worker_config = 10;
// Optional. The Google Compute Engine config settings for
// additional worker instances in a cluster.
InstanceGroupConfig secondary_worker_config = 12;
// Optional. The config settings for software inside the cluster.
SoftwareConfig software_config = 13;
// Optional. Commands to execute on each node after config is
// completed. By default, executables are run on master and all worker nodes.
// You can test a node's `role` metadata to run an executable on
// a master or worker node, as shown below using `curl` (you can also use `wget`):
//
// ROLE=$(curl -H Metadata-Flavor:Google http://metadata/computeMetadata/v1/instance/attributes/dataproc-role)
// if [[ "${ROLE}" == 'Master' ]]; then
// ... master specific actions ...
// else
// ... worker specific actions ...
// fi
repeated NodeInitializationAction initialization_actions = 11;
}
// Common config settings for resources of Google Compute Engine cluster
// instances, applicable to all instances in the cluster.
message GceClusterConfig {
// Optional. The zone where the Google Compute Engine cluster will be located.
// On a create request, it is required in the "global" region. If omitted
// in a non-global Cloud Dataproc region, the service will pick a zone in the
// corresponding Compute Engine region. On a get request, zone will
// always be present.
//
// A full URL, partial URI, or short name are valid. Examples:
//
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]`
// * `projects/[project_id]/zones/[zone]`
// * `us-central1-f`
string zone_uri = 1;
// Optional. The Google Compute Engine network to be used for machine
// communications. Cannot be specified with subnetwork_uri. If neither
// `network_uri` nor `subnetwork_uri` is specified, the "default" network of
// the project is used, if it exists. Cannot be a "Custom Subnet Network" (see
// [Using Subnetworks](/compute/docs/subnetworks) for more information).
//
// A full URL, partial URI, or short name are valid. Examples:
//
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default`
// * `projects/[project_id]/regions/global/default`
// * `default`
string network_uri = 2;
// Optional. The Google Compute Engine subnetwork to be used for machine
// communications. Cannot be specified with network_uri.
//
// A full URL, partial URI, or short name are valid. Examples:
//
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/us-east1/sub0`
// * `projects/[project_id]/regions/us-east1/sub0`
// * `sub0`
string subnetwork_uri = 6;
// Optional. If true, all instances in the cluster will only have internal IP
// addresses. By default, clusters are not restricted to internal IP addresses,
// and will have ephemeral external IP addresses assigned to each instance.
// This `internal_ip_only` restriction can only be enabled for subnetwork
// enabled networks, and all off-cluster dependencies must be configured to be
// accessible without external IP addresses.
bool internal_ip_only = 7;
// Optional. The service account of the instances. Defaults to the default
// Google Compute Engine service account. Custom service accounts need
// permissions equivalent to the folloing IAM roles:
//
// * roles/logging.logWriter
// * roles/storage.objectAdmin
//
// (see https://cloud.google.com/compute/docs/access/service-accounts#custom_service_accounts
// for more information).
// Example: `[account_id]@[project_id].iam.gserviceaccount.com`
string service_account = 8;
// Optional. The URIs of service account scopes to be included in Google
// Compute Engine instances. The following base set of scopes is always
// included:
//
// * https://www.googleapis.com/auth/cloud.useraccounts.readonly
// * https://www.googleapis.com/auth/devstorage.read_write
// * https://www.googleapis.com/auth/logging.write
//
// If no scopes are specified, the following defaults are also provided:
//
// * https://www.googleapis.com/auth/bigquery
// * https://www.googleapis.com/auth/bigtable.admin.table
// * https://www.googleapis.com/auth/bigtable.data
// * https://www.googleapis.com/auth/devstorage.full_control
repeated string service_account_scopes = 3;
// The Google Compute Engine tags to add to all instances (see
// [Tagging instances](/compute/docs/label-or-tag-resources#tags)).
repeated string tags = 4;
// The Google Compute Engine metadata entries to add to all instances (see
// [Project and instance metadata](https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)).
map<string, string> metadata = 5;
}
// Optional. The config settings for Google Compute Engine resources in
// an instance group, such as a master or worker group.
message InstanceGroupConfig {
// Optional. The number of VM instances in the instance group.
// For master instance groups, must be set to 1.
int32 num_instances = 1;
// Optional. The list of instance names. Cloud Dataproc derives the names from
// `cluster_name`, `num_instances`, and the instance group if not set by user
// (recommended practice is to let Cloud Dataproc derive the name).
repeated string instance_names = 2;
// Output-only. The Google Compute Engine image resource used for cluster
// instances. Inferred from `SoftwareConfig.image_version`.
string image_uri = 3;
// Optional. The Google Compute Engine machine type used for cluster instances.
//
// A full URL, partial URI, or short name are valid. Examples:
//
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
// * `projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
// * `n1-standard-2`
string machine_type_uri = 4;
// Optional. Disk option config settings.
DiskConfig disk_config = 5;
// Optional. Specifies that this instance group contains preemptible instances.
bool is_preemptible = 6;
// Output-only. The config for Google Compute Engine Instance Group
// Manager that manages this group.
// This is only used for preemptible instance groups.
ManagedGroupConfig managed_group_config = 7;
// Optional. The Google Compute Engine accelerator configuration for these
// instances.
//
// **Beta Feature**: This feature is still under development. It may be
// changed before final release.
repeated AcceleratorConfig accelerators = 8;
}
// Specifies the resources used to actively manage an instance group.
message ManagedGroupConfig {
// Output-only. The name of the Instance Template used for the Managed
// Instance Group.
string instance_template_name = 1;
// Output-only. The name of the Instance Group Manager for this group.
string instance_group_manager_name = 2;
}
// Specifies the type and number of accelerator cards attached to the instances
// of an instance group (see [GPUs on Compute Engine](/compute/docs/gpus/)).
message AcceleratorConfig {
// Full URL, partial URI, or short name of the accelerator type resource to
// expose to this instance. See [Google Compute Engine AcceleratorTypes](
// /compute/docs/reference/beta/acceleratorTypes)
//
// Examples
// * `https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
// * `projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
// * `nvidia-tesla-k80`
string accelerator_type_uri = 1;
// The number of the accelerator cards of this type exposed to this instance.
int32 accelerator_count = 2;
}
// Specifies the config of disk options for a group of VM instances.
message DiskConfig {
// Optional. Size in GB of the boot disk (default is 500GB).
int32 boot_disk_size_gb = 1;
// Optional. Number of attached SSDs, from 0 to 4 (default is 0).
// If SSDs are not attached, the boot disk is used to store runtime logs and
// [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data.
// If one or more SSDs are attached, this runtime bulk
// data is spread across them, and the boot disk contains only basic
// config and installed binaries.
int32 num_local_ssds = 2;
}
// Specifies an executable to run on a fully configured node and a
// timeout period for executable completion.
message NodeInitializationAction {
// Required. Google Cloud Storage URI of executable file.
string executable_file = 1;
// Optional. Amount of time executable has to complete. Default is
// 10 minutes. Cluster creation fails with an explanatory error message (the
// name of the executable that caused the error and the exceeded timeout
// period) if the executable is not completed at end of the timeout period.
google.protobuf.Duration execution_timeout = 2;
}
// The status of a cluster and its instances.
message ClusterStatus {
// The cluster state.
enum State {
// The cluster state is unknown.
UNKNOWN = 0;
// The cluster is being created and set up. It is not ready for use.
CREATING = 1;
// The cluster is currently running and healthy. It is ready for use.
RUNNING = 2;
// The cluster encountered an error. It is not ready for use.
ERROR = 3;
// The cluster is being deleted. It cannot be used.
DELETING = 4;
// The cluster is being updated. It continues to accept and process jobs.
UPDATING = 5;
}
enum Substate {
UNSPECIFIED = 0;
// The cluster is known to be in an unhealthy state
// (for example, critical daemons are not running or HDFS capacity is
// exhausted).
//
// Applies to RUNNING state.
UNHEALTHY = 1;
// The agent-reported status is out of date (may occur if
// Cloud Dataproc loses communication with Agent).
//
// Applies to RUNNING state.
STALE_STATUS = 2;
}
// Output-only. The cluster's state.
State state = 1;
// Output-only. Optional details of cluster's state.
string detail = 2;
// Output-only. Time when this state was entered.
google.protobuf.Timestamp state_start_time = 3;
// Output-only. Additional state information that includes
// status reported by the agent.
Substate substate = 4;
}
// Specifies the selection and config of software inside the cluster.
message SoftwareConfig {
// Optional. The version of software inside the cluster. It must match the
// regular expression `[0-9]+\.[0-9]+`. If unspecified, it defaults to the
// latest version (see [Cloud Dataproc Versioning](/dataproc/versioning)).
string image_version = 1;
// Optional. The properties to set on daemon config files.
//
// Property keys are specified in `prefix:property` format, such as
// `core:fs.defaultFS`. The following are supported prefixes
// and their mappings:
//
// * capacity-scheduler: `capacity-scheduler.xml`
// * core: `core-site.xml`
// * distcp: `distcp-default.xml`
// * hdfs: `hdfs-site.xml`
// * hive: `hive-site.xml`
// * mapred: `mapred-site.xml`
// * pig: `pig.properties`
// * spark: `spark-defaults.conf`
// * yarn: `yarn-site.xml`
//
// For more information, see
// [Cluster properties](/dataproc/docs/concepts/cluster-properties).
map<string, string> properties = 2;
}
// Contains cluster daemon metrics, such as HDFS and YARN stats.
//
// **Beta Feature**: This report is available for testing purposes only. It may
// be changed before final release.
message ClusterMetrics {
// The HDFS metrics.
map<string, int64> hdfs_metrics = 1;
// The YARN metrics.
map<string, int64> yarn_metrics = 2;
}
// A request to create a cluster.
message CreateClusterRequest {
// Required. The ID of the Google Cloud Platform project that the cluster
// belongs to.
string project_id = 1;
// Required. The Cloud Dataproc region in which to handle the request.
string region = 3;
// Required. The cluster to create.
Cluster cluster = 2;
}
// A request to update a cluster.
message UpdateClusterRequest {
// Required. The ID of the Google Cloud Platform project the
// cluster belongs to.
string project_id = 1;
// Required. The Cloud Dataproc region in which to handle the request.
string region = 5;
// Required. The cluster name.
string cluster_name = 2;
// Required. The changes to the cluster.
Cluster cluster = 3;
// Required. Specifies the path, relative to `Cluster`, of
// the field to update. For example, to change the number of workers
// in a cluster to 5, the `update_mask` parameter would be
// specified as `config.worker_config.num_instances`,
// and the `PATCH` request body would specify the new value, as follows:
//
// {
// "config":{
// "workerConfig":{
// "numInstances":"5"
// }
// }
// }
// Similarly, to change the number of preemptible workers in a cluster to 5,
// the `update_mask` parameter would be
// `config.secondary_worker_config.num_instances`, and the `PATCH` request
// body would be set as follows:
//
// {
// "config":{
// "secondaryWorkerConfig":{
// "numInstances":"5"
// }
// }
// }
// <strong>Note:</strong> Currently, only the following fields can be updated:
//
// <table>
// <tbody>
// <tr>
// <td><strong>Mask</strong></td>
// <td><strong>Purpose</strong></td>
// </tr>
// <tr>
// <td><strong><em>labels</em></strong></td>
// <td>Update labels</td>
// </tr>
// <tr>
// <td><strong><em>config.worker_config.num_instances</em></strong></td>
// <td>Resize primary worker group</td>
// </tr>
// <tr>
// <td><strong><em>config.secondary_worker_config.num_instances</em></strong></td>
// <td>Resize secondary worker group</td>
// </tr>
// </tbody>
// </table>
google.protobuf.FieldMask update_mask = 4;
}
// A request to delete a cluster.
message DeleteClusterRequest {
// Required. The ID of the Google Cloud Platform project that the cluster
// belongs to.
string project_id = 1;
// Required. The Cloud Dataproc region in which to handle the request.
string region = 3;
// Required. The cluster name.
string cluster_name = 2;
}
// Request to get the resource representation for a cluster in a project.
message GetClusterRequest {
// Required. The ID of the Google Cloud Platform project that the cluster
// belongs to.
string project_id = 1;
// Required. The Cloud Dataproc region in which to handle the request.
string region = 3;
// Required. The cluster name.
string cluster_name = 2;
}
// A request to list the clusters in a project.
message ListClustersRequest {
// Required. The ID of the Google Cloud Platform project that the cluster
// belongs to.
string project_id = 1;
// Required. The Cloud Dataproc region in which to handle the request.
string region = 4;
// Optional. A filter constraining the clusters to list. Filters are
// case-sensitive and have the following syntax:
//
// field = value [AND [field = value]] ...
//
// where **field** is one of `status.state`, `clusterName`, or `labels.[KEY]`,
// and `[KEY]` is a label key. **value** can be `*` to match all values.
// `status.state` can be one of the following: `ACTIVE`, `INACTIVE`,
// `CREATING`, `RUNNING`, `ERROR`, `DELETING`, or `UPDATING`. `ACTIVE`
// contains the `CREATING`, `UPDATING`, and `RUNNING` states. `INACTIVE`
// contains the `DELETING` and `ERROR` states.
// `clusterName` is the name of the cluster provided at creation time.
// Only the logical `AND` operator is supported; space-separated items are
// treated as having an implicit `AND` operator.
//
// Example filter:
//
// status.state = ACTIVE AND clusterName = mycluster
// AND labels.env = staging AND labels.starred = *
string filter = 5;
// Optional. The standard List page size.
int32 page_size = 2;
// Optional. The standard List page token.
string page_token = 3;
}
// The list of all clusters in a project.
message ListClustersResponse {
// Output-only. The clusters in the project.
repeated Cluster clusters = 1;
// Output-only. This token is included in the response if there are more
// results to fetch. To fetch additional results, provide this value as the
// `page_token` in a subsequent `ListClustersRequest`.
string next_page_token = 2;
}
// A request to collect cluster diagnostic information.
message DiagnoseClusterRequest {
// Required. The ID of the Google Cloud Platform project that the cluster
// belongs to.
string project_id = 1;
// Required. The Cloud Dataproc region in which to handle the request.
string region = 3;
// Required. The cluster name.
string cluster_name = 2;
}
// The location of diagnostic output.
message DiagnoseClusterResults {
// Output-only. The Google Cloud Storage URI of the diagnostic output.
// The output report is a plain text file with a summary of collected
// diagnostics.
string output_uri = 1;
}

View File

@@ -0,0 +1,740 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.dataproc.v1;
import "google/api/annotations.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/field_mask.proto";
import "google/protobuf/timestamp.proto";
option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc";
option java_multiple_files = true;
option java_outer_classname = "JobsProto";
option java_package = "com.google.cloud.dataproc.v1";
// The JobController provides methods to manage jobs.
service JobController {
// Submits a job to a cluster.
rpc SubmitJob(SubmitJobRequest) returns (Job) {
option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/jobs:submit" body: "*" };
}
// Gets the resource representation for a job in a project.
rpc GetJob(GetJobRequest) returns (Job) {
option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}" };
}
// Lists regions/{region}/jobs in a project.
rpc ListJobs(ListJobsRequest) returns (ListJobsResponse) {
option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/jobs" };
}
// Updates a job in a project.
rpc UpdateJob(UpdateJobRequest) returns (Job) {
option (google.api.http) = { patch: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}" body: "job" };
}
// Starts a job cancellation request. To access the job resource
// after cancellation, call
// [regions/{region}/jobs.list](/dataproc/docs/reference/rest/v1/projects.regions.jobs/list) or
// [regions/{region}/jobs.get](/dataproc/docs/reference/rest/v1/projects.regions.jobs/get).
rpc CancelJob(CancelJobRequest) returns (Job) {
option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}:cancel" body: "*" };
}
// Deletes the job from the project. If the job is active, the delete fails,
// and the response returns `FAILED_PRECONDITION`.
rpc DeleteJob(DeleteJobRequest) returns (google.protobuf.Empty) {
option (google.api.http) = { delete: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}" };
}
}
// The runtime logging config of the job.
message LoggingConfig {
// The Log4j level for job execution. When running an
// [Apache Hive](http://hive.apache.org/) job, Cloud
// Dataproc configures the Hive client to an equivalent verbosity level.
enum Level {
// Level is unspecified. Use default level for log4j.
LEVEL_UNSPECIFIED = 0;
// Use ALL level for log4j.
ALL = 1;
// Use TRACE level for log4j.
TRACE = 2;
// Use DEBUG level for log4j.
DEBUG = 3;
// Use INFO level for log4j.
INFO = 4;
// Use WARN level for log4j.
WARN = 5;
// Use ERROR level for log4j.
ERROR = 6;
// Use FATAL level for log4j.
FATAL = 7;
// Turn off log4j.
OFF = 8;
}
// The per-package log levels for the driver. This may include
// "root" package name to configure rootLogger.
// Examples:
// 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
map<string, Level> driver_log_levels = 2;
}
// A Cloud Dataproc job for running
// [Apache Hadoop MapReduce](https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html)
// jobs on [Apache Hadoop YARN](https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html).
message HadoopJob {
// Required. Indicates the location of the driver's main class. Specify
// either the jar file that contains the main class or the main class name.
// To specify both, add the jar file to `jar_file_uris`, and then specify
// the main class name in this property.
oneof driver {
// The HCFS URI of the jar file containing the main class.
// Examples:
// 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar'
// 'hdfs:/tmp/test-samples/custom-wordcount.jar'
// 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
string main_jar_file_uri = 1;
// The name of the driver's main class. The jar file containing the class
// must be in the default CLASSPATH or specified in `jar_file_uris`.
string main_class = 2;
}
// Optional. The arguments to pass to the driver. Do not
// include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as job
// properties, since a collision may occur that causes an incorrect job
// submission.
repeated string args = 3;
// Optional. Jar file URIs to add to the CLASSPATHs of the
// Hadoop driver and tasks.
repeated string jar_file_uris = 4;
// Optional. HCFS (Hadoop Compatible Filesystem) URIs of files to be copied
// to the working directory of Hadoop drivers and distributed tasks. Useful
// for naively parallel tasks.
repeated string file_uris = 5;
// Optional. HCFS URIs of archives to be extracted in the working directory of
// Hadoop drivers and tasks. Supported file types:
// .jar, .tar, .tar.gz, .tgz, or .zip.
repeated string archive_uris = 6;
// Optional. A mapping of property names to values, used to configure Hadoop.
// Properties that conflict with values set by the Cloud Dataproc API may be
// overwritten. Can include properties set in /etc/hadoop/conf/*-site and
// classes in user code.
map<string, string> properties = 7;
// Optional. The runtime log config for job execution.
LoggingConfig logging_config = 8;
}
// A Cloud Dataproc job for running [Apache Spark](http://spark.apache.org/)
// applications on YARN.
message SparkJob {
// Required. The specification of the main method to call to drive the job.
// Specify either the jar file that contains the main class or the main class
// name. To pass both a main jar and a main class in that jar, add the jar to
// `CommonJob.jar_file_uris`, and then specify the main class name in `main_class`.
oneof driver {
// The HCFS URI of the jar file that contains the main class.
string main_jar_file_uri = 1;
// The name of the driver's main class. The jar file that contains the class
// must be in the default CLASSPATH or specified in `jar_file_uris`.
string main_class = 2;
}
// Optional. The arguments to pass to the driver. Do not include arguments,
// such as `--conf`, that can be set as job properties, since a collision may
// occur that causes an incorrect job submission.
repeated string args = 3;
// Optional. HCFS URIs of jar files to add to the CLASSPATHs of the
// Spark driver and tasks.
repeated string jar_file_uris = 4;
// Optional. HCFS URIs of files to be copied to the working directory of
// Spark drivers and distributed tasks. Useful for naively parallel tasks.
repeated string file_uris = 5;
// Optional. HCFS URIs of archives to be extracted in the working directory
// of Spark drivers and tasks. Supported file types:
// .jar, .tar, .tar.gz, .tgz, and .zip.
repeated string archive_uris = 6;
// Optional. A mapping of property names to values, used to configure Spark.
// Properties that conflict with values set by the Cloud Dataproc API may be
// overwritten. Can include properties set in
// /etc/spark/conf/spark-defaults.conf and classes in user code.
map<string, string> properties = 7;
// Optional. The runtime log config for job execution.
LoggingConfig logging_config = 8;
}
// A Cloud Dataproc job for running
// [Apache PySpark](https://spark.apache.org/docs/0.9.0/python-programming-guide.html)
// applications on YARN.
message PySparkJob {
// Required. The HCFS URI of the main Python file to use as the driver. Must
// be a .py file.
string main_python_file_uri = 1;
// Optional. The arguments to pass to the driver. Do not include arguments,
// such as `--conf`, that can be set as job properties, since a collision may
// occur that causes an incorrect job submission.
repeated string args = 2;
// Optional. HCFS file URIs of Python files to pass to the PySpark
// framework. Supported file types: .py, .egg, and .zip.
repeated string python_file_uris = 3;
// Optional. HCFS URIs of jar files to add to the CLASSPATHs of the
// Python driver and tasks.
repeated string jar_file_uris = 4;
// Optional. HCFS URIs of files to be copied to the working directory of
// Python drivers and distributed tasks. Useful for naively parallel tasks.
repeated string file_uris = 5;
// Optional. HCFS URIs of archives to be extracted in the working directory of
// .jar, .tar, .tar.gz, .tgz, and .zip.
repeated string archive_uris = 6;
// Optional. A mapping of property names to values, used to configure PySpark.
// Properties that conflict with values set by the Cloud Dataproc API may be
// overwritten. Can include properties set in
// /etc/spark/conf/spark-defaults.conf and classes in user code.
map<string, string> properties = 7;
// Optional. The runtime log config for job execution.
LoggingConfig logging_config = 8;
}
// A list of queries to run on a cluster.
message QueryList {
// Required. The queries to execute. You do not need to terminate a query
// with a semicolon. Multiple queries can be specified in one string
// by separating each with a semicolon. Here is an example of an Cloud
// Dataproc API snippet that uses a QueryList to specify a HiveJob:
//
// "hiveJob": {
// "queryList": {
// "queries": [
// "query1",
// "query2",
// "query3;query4",
// ]
// }
// }
repeated string queries = 1;
}
// A Cloud Dataproc job for running [Apache Hive](https://hive.apache.org/)
// queries on YARN.
message HiveJob {
// Required. The sequence of Hive queries to execute, specified as either
// an HCFS file URI or a list of queries.
oneof queries {
// The HCFS URI of the script that contains Hive queries.
string query_file_uri = 1;
// A list of queries.
QueryList query_list = 2;
}
// Optional. Whether to continue executing queries if a query fails.
// The default value is `false`. Setting to `true` can be useful when executing
// independent parallel queries.
bool continue_on_failure = 3;
// Optional. Mapping of query variable names to values (equivalent to the
// Hive command: `SET name="value";`).
map<string, string> script_variables = 4;
// Optional. A mapping of property names and values, used to configure Hive.
// Properties that conflict with values set by the Cloud Dataproc API may be
// overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
// /etc/hive/conf/hive-site.xml, and classes in user code.
map<string, string> properties = 5;
// Optional. HCFS URIs of jar files to add to the CLASSPATH of the
// Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes
// and UDFs.
repeated string jar_file_uris = 6;
}
// A Cloud Dataproc job for running [Apache Spark SQL](http://spark.apache.org/sql/)
// queries.
message SparkSqlJob {
// Required. The sequence of Spark SQL queries to execute, specified as
// either an HCFS file URI or as a list of queries.
oneof queries {
// The HCFS URI of the script that contains SQL queries.
string query_file_uri = 1;
// A list of queries.
QueryList query_list = 2;
}
// Optional. Mapping of query variable names to values (equivalent to the
// Spark SQL command: SET `name="value";`).
map<string, string> script_variables = 3;
// Optional. A mapping of property names to values, used to configure
// Spark SQL's SparkConf. Properties that conflict with values set by the
// Cloud Dataproc API may be overwritten.
map<string, string> properties = 4;
// Optional. HCFS URIs of jar files to be added to the Spark CLASSPATH.
repeated string jar_file_uris = 56;
// Optional. The runtime log config for job execution.
LoggingConfig logging_config = 6;
}
// A Cloud Dataproc job for running [Apache Pig](https://pig.apache.org/)
// queries on YARN.
message PigJob {
// Required. The sequence of Pig queries to execute, specified as an HCFS
// file URI or a list of queries.
oneof queries {
// The HCFS URI of the script that contains the Pig queries.
string query_file_uri = 1;
// A list of queries.
QueryList query_list = 2;
}
// Optional. Whether to continue executing queries if a query fails.
// The default value is `false`. Setting to `true` can be useful when executing
// independent parallel queries.
bool continue_on_failure = 3;
// Optional. Mapping of query variable names to values (equivalent to the Pig
// command: `name=[value]`).
map<string, string> script_variables = 4;
// Optional. A mapping of property names to values, used to configure Pig.
// Properties that conflict with values set by the Cloud Dataproc API may be
// overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
// /etc/pig/conf/pig.properties, and classes in user code.
map<string, string> properties = 5;
// Optional. HCFS URIs of jar files to add to the CLASSPATH of
// the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs.
repeated string jar_file_uris = 6;
// Optional. The runtime log config for job execution.
LoggingConfig logging_config = 7;
}
// Cloud Dataproc job config.
message JobPlacement {
// Required. The name of the cluster where the job will be submitted.
string cluster_name = 1;
// Output-only. A cluster UUID generated by the Cloud Dataproc service when
// the job is submitted.
string cluster_uuid = 2;
}
// Cloud Dataproc job status.
message JobStatus {
// The job state.
enum State {
// The job state is unknown.
STATE_UNSPECIFIED = 0;
// The job is pending; it has been submitted, but is not yet running.
PENDING = 1;
// Job has been received by the service and completed initial setup;
// it will soon be submitted to the cluster.
SETUP_DONE = 8;
// The job is running on the cluster.
RUNNING = 2;
// A CancelJob request has been received, but is pending.
CANCEL_PENDING = 3;
// Transient in-flight resources have been canceled, and the request to
// cancel the running job has been issued to the cluster.
CANCEL_STARTED = 7;
// The job cancellation was successful.
CANCELLED = 4;
// The job has completed successfully.
DONE = 5;
// The job has completed, but encountered an error.
ERROR = 6;
// Job attempt has failed. The detail field contains failure details for
// this attempt.
//
// Applies to restartable jobs only.
ATTEMPT_FAILURE = 9;
}
enum Substate {
UNSPECIFIED = 0;
// The Job is submitted to the agent.
//
// Applies to RUNNING state.
SUBMITTED = 1;
// The Job has been received and is awaiting execution (it may be waiting
// for a condition to be met). See the "details" field for the reason for
// the delay.
//
// Applies to RUNNING state.
QUEUED = 2;
// The agent-reported status is out of date, which may be caused by a
// loss of communication between the agent and Cloud Dataproc. If the
// agent does not send a timely update, the job will fail.
//
// Applies to RUNNING state.
STALE_STATUS = 3;
}
// Output-only. A state message specifying the overall job state.
State state = 1;
// Output-only. Optional job state details, such as an error
// description if the state is <code>ERROR</code>.
string details = 2;
// Output-only. The time when this state was entered.
google.protobuf.Timestamp state_start_time = 6;
// Output-only. Additional state information, which includes
// status reported by the agent.
Substate substate = 7;
}
// Encapsulates the full scoping used to reference a job.
message JobReference {
// Required. The ID of the Google Cloud Platform project that the job
// belongs to.
string project_id = 1;
// Optional. The job ID, which must be unique within the project. The job ID
// is generated by the server upon job submission or provided by the user as a
// means to perform retries without creating duplicate jobs. The ID must
// contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or
// hyphens (-). The maximum length is 100 characters.
string job_id = 2;
}
// A YARN application created by a job. Application information is a subset of
// <code>org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto</code>.
//
// **Beta Feature**: This report is available for testing purposes only. It may
// be changed before final release.
message YarnApplication {
// The application state, corresponding to
// <code>YarnProtos.YarnApplicationStateProto</code>.
enum State {
// Status is unspecified.
STATE_UNSPECIFIED = 0;
// Status is NEW.
NEW = 1;
// Status is NEW_SAVING.
NEW_SAVING = 2;
// Status is SUBMITTED.
SUBMITTED = 3;
// Status is ACCEPTED.
ACCEPTED = 4;
// Status is RUNNING.
RUNNING = 5;
// Status is FINISHED.
FINISHED = 6;
// Status is FAILED.
FAILED = 7;
// Status is KILLED.
KILLED = 8;
}
// Required. The application name.
string name = 1;
// Required. The application state.
State state = 2;
// Required. The numerical progress of the application, from 1 to 100.
float progress = 3;
// Optional. The HTTP URL of the ApplicationMaster, HistoryServer, or
// TimelineServer that provides application-specific information. The URL uses
// the internal hostname, and requires a proxy server for resolution and,
// possibly, access.
string tracking_url = 4;
}
// A Cloud Dataproc job resource.
message Job {
// Optional. The fully qualified reference to the job, which can be used to
// obtain the equivalent REST path of the job resource. If this property
// is not specified when a job is created, the server generates a
// <code>job_id</code>.
JobReference reference = 1;
// Required. Job information, including how, when, and where to
// run the job.
JobPlacement placement = 2;
// Required. The application/framework-specific portion of the job.
oneof type_job {
// Job is a Hadoop job.
HadoopJob hadoop_job = 3;
// Job is a Spark job.
SparkJob spark_job = 4;
// Job is a Pyspark job.
PySparkJob pyspark_job = 5;
// Job is a Hive job.
HiveJob hive_job = 6;
// Job is a Pig job.
PigJob pig_job = 7;
// Job is a SparkSql job.
SparkSqlJob spark_sql_job = 12;
}
// Output-only. The job status. Additional application-specific
// status information may be contained in the <code>type_job</code>
// and <code>yarn_applications</code> fields.
JobStatus status = 8;
// Output-only. The previous job status.
repeated JobStatus status_history = 13;
// Output-only. The collection of YARN applications spun up by this job.
//
// **Beta** Feature: This report is available for testing purposes only. It may
// be changed before final release.
repeated YarnApplication yarn_applications = 9;
// Output-only. A URI pointing to the location of the stdout of the job's
// driver program.
string driver_output_resource_uri = 17;
// Output-only. If present, the location of miscellaneous control files
// which may be used as part of job setup and handling. If not present,
// control files may be placed in the same location as `driver_output_uri`.
string driver_control_files_uri = 15;
// Optional. The labels to associate with this job.
// Label **keys** must contain 1 to 63 characters, and must conform to
// [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
// Label **values** may be empty, but, if present, must contain 1 to 63
// characters, and must conform to [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
// No more than 32 labels can be associated with a job.
map<string, string> labels = 18;
// Optional. Job scheduling configuration.
JobScheduling scheduling = 20;
}
// Job scheduling options.
//
// **Beta Feature**: These options are available for testing purposes only.
// They may be changed before final release.
message JobScheduling {
// Optional. Maximum number of times per hour a driver may be restarted as
// a result of driver terminating with non-zero code before job is
// reported failed.
//
// A job may be reported as thrashing if driver exits with non-zero code
// 4 times within 10 minute window.
//
// Maximum value is 10.
int32 max_failures_per_hour = 1;
}
// A request to submit a job.
message SubmitJobRequest {
// Required. The ID of the Google Cloud Platform project that the job
// belongs to.
string project_id = 1;
// Required. The Cloud Dataproc region in which to handle the request.
string region = 3;
// Required. The job resource.
Job job = 2;
}
// A request to get the resource representation for a job in a project.
message GetJobRequest {
// Required. The ID of the Google Cloud Platform project that the job
// belongs to.
string project_id = 1;
// Required. The Cloud Dataproc region in which to handle the request.
string region = 3;
// Required. The job ID.
string job_id = 2;
}
// A request to list jobs in a project.
message ListJobsRequest {
// A matcher that specifies categories of job states.
enum JobStateMatcher {
// Match all jobs, regardless of state.
ALL = 0;
// Only match jobs in non-terminal states: PENDING, RUNNING, or
// CANCEL_PENDING.
ACTIVE = 1;
// Only match jobs in terminal states: CANCELLED, DONE, or ERROR.
NON_ACTIVE = 2;
}
// Required. The ID of the Google Cloud Platform project that the job
// belongs to.
string project_id = 1;
// Required. The Cloud Dataproc region in which to handle the request.
string region = 6;
// Optional. The number of results to return in each response.
int32 page_size = 2;
// Optional. The page token, returned by a previous call, to request the
// next page of results.
string page_token = 3;
// Optional. If set, the returned jobs list includes only jobs that were
// submitted to the named cluster.
string cluster_name = 4;
// Optional. Specifies enumerated categories of jobs to list.
// (default = match ALL jobs).
//
// If `filter` is provided, `jobStateMatcher` will be ignored.
JobStateMatcher job_state_matcher = 5;
// Optional. A filter constraining the jobs to list. Filters are
// case-sensitive and have the following syntax:
//
// [field = value] AND [field [= value]] ...
//
// where **field** is `status.state` or `labels.[KEY]`, and `[KEY]` is a label
// key. **value** can be `*` to match all values.
// `status.state` can be either `ACTIVE` or `NON_ACTIVE`.
// Only the logical `AND` operator is supported; space-separated items are
// treated as having an implicit `AND` operator.
//
// Example filter:
//
// status.state = ACTIVE AND labels.env = staging AND labels.starred = *
string filter = 7;
}
// A request to update a job.
message UpdateJobRequest {
// Required. The ID of the Google Cloud Platform project that the job
// belongs to.
string project_id = 1;
// Required. The Cloud Dataproc region in which to handle the request.
string region = 2;
// Required. The job ID.
string job_id = 3;
// Required. The changes to the job.
Job job = 4;
// Required. Specifies the path, relative to <code>Job</code>, of
// the field to update. For example, to update the labels of a Job the
// <code>update_mask</code> parameter would be specified as
// <code>labels</code>, and the `PATCH` request body would specify the new
// value. <strong>Note:</strong> Currently, <code>labels</code> is the only
// field that can be updated.
google.protobuf.FieldMask update_mask = 5;
}
// A list of jobs in a project.
message ListJobsResponse {
// Output-only. Jobs list.
repeated Job jobs = 1;
// Optional. This token is included in the response if there are more results
// to fetch. To fetch additional results, provide this value as the
// `page_token` in a subsequent <code>ListJobsRequest</code>.
string next_page_token = 2;
}
// A request to cancel a job.
message CancelJobRequest {
// Required. The ID of the Google Cloud Platform project that the job
// belongs to.
string project_id = 1;
// Required. The Cloud Dataproc region in which to handle the request.
string region = 3;
// Required. The job ID.
string job_id = 2;
}
// A request to delete a job.
message DeleteJobRequest {
// Required. The ID of the Google Cloud Platform project that the job
// belongs to.
string project_id = 1;
// Required. The Cloud Dataproc region in which to handle the request.
string region = 3;
// Required. The job ID.
string job_id = 2;
}

View File

@@ -0,0 +1,85 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.dataproc.v1;
import "google/api/annotations.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/timestamp.proto";
option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc";
option java_multiple_files = true;
option java_outer_classname = "OperationsProto";
option java_package = "com.google.cloud.dataproc.v1";
// The status of the operation.
message ClusterOperationStatus {
// The operation state.
enum State {
// Unused.
UNKNOWN = 0;
// The operation has been created.
PENDING = 1;
// The operation is running.
RUNNING = 2;
// The operation is done; either cancelled or completed.
DONE = 3;
}
// Output-only. A message containing the operation state.
State state = 1;
// Output-only. A message containing the detailed operation state.
string inner_state = 2;
// Output-only.A message containing any operation metadata details.
string details = 3;
// Output-only. The time this state was entered.
google.protobuf.Timestamp state_start_time = 4;
}
// Metadata describing the operation.
message ClusterOperationMetadata {
// Output-only. Name of the cluster for the operation.
string cluster_name = 7;
// Output-only. Cluster UUID for the operation.
string cluster_uuid = 8;
// Output-only. Current operation status.
ClusterOperationStatus status = 9;
// Output-only. The previous operation status.
repeated ClusterOperationStatus status_history = 10;
// Output-only. The operation type.
string operation_type = 11;
// Output-only. Short description of operation.
string description = 12;
// Output-only. Labels associated with the operation
map<string, string> labels = 13;
// Output-only. Errors encountered during operation execution.
repeated string warnings = 14;
}

View File

@@ -0,0 +1,712 @@
// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.dataproc.v1beta2;
import "google/api/annotations.proto";
import "google/cloud/dataproc/v1beta2/shared.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/field_mask.proto";
import "google/protobuf/timestamp.proto";
option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1beta2;dataproc";
option java_multiple_files = true;
option java_outer_classname = "ClustersProto";
option java_package = "com.google.cloud.dataproc.v1beta2";
// The ClusterControllerService provides methods to manage clusters
// of Compute Engine instances.
service ClusterController {
// Creates a cluster in a project.
rpc CreateCluster(CreateClusterRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1beta2/projects/{project_id}/regions/{region}/clusters"
body: "cluster"
};
}
// Updates a cluster in a project.
rpc UpdateCluster(UpdateClusterRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
patch: "/v1beta2/projects/{project_id}/regions/{region}/clusters/{cluster_name}"
body: "cluster"
};
}
// Deletes a cluster in a project.
rpc DeleteCluster(DeleteClusterRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
delete: "/v1beta2/projects/{project_id}/regions/{region}/clusters/{cluster_name}"
};
}
// Gets the resource representation for a cluster in a project.
rpc GetCluster(GetClusterRequest) returns (Cluster) {
option (google.api.http) = {
get: "/v1beta2/projects/{project_id}/regions/{region}/clusters/{cluster_name}"
};
}
// Lists all regions/{region}/clusters in a project.
rpc ListClusters(ListClustersRequest) returns (ListClustersResponse) {
option (google.api.http) = {
get: "/v1beta2/projects/{project_id}/regions/{region}/clusters"
};
}
// Gets cluster diagnostic information.
// After the operation completes, the Operation.response field
// contains `DiagnoseClusterOutputLocation`.
rpc DiagnoseCluster(DiagnoseClusterRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1beta2/projects/{project_id}/regions/{region}/clusters/{cluster_name}:diagnose"
body: "*"
};
}
}
// Describes the identifying information, config, and status of
// a cluster of Compute Engine instances.
message Cluster {
// Required. The Google Cloud Platform project ID that the cluster belongs to.
string project_id = 1;
// Required. The cluster name. Cluster names within a project must be
// unique. Names of deleted clusters can be reused.
string cluster_name = 2;
// Required. The cluster config. Note that Cloud Dataproc may set
// default values, and values may change when clusters are updated.
ClusterConfig config = 3;
// Optional. The labels to associate with this cluster.
// Label **keys** must contain 1 to 63 characters, and must conform to
// [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
// Label **values** may be empty, but, if present, must contain 1 to 63
// characters, and must conform to [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
// No more than 32 labels can be associated with a cluster.
map<string, string> labels = 8;
// Output only. Cluster status.
ClusterStatus status = 4;
// Output only. The previous cluster status.
repeated ClusterStatus status_history = 7;
// Output only. A cluster UUID (Unique Universal Identifier). Cloud Dataproc
// generates this value when it creates the cluster.
string cluster_uuid = 6;
// Contains cluster daemon metrics such as HDFS and YARN stats.
//
// **Beta Feature**: This report is available for testing purposes only. It may
// be changed before final release.
ClusterMetrics metrics = 9;
}
// The cluster config.
message ClusterConfig {
// Optional. A Cloud Storage staging bucket used for sharing generated
// SSH keys and config. If you do not specify a staging bucket, Cloud
// Dataproc will determine an appropriate Cloud Storage location (US,
// ASIA, or EU) for your cluster's staging bucket according to the Google
// Compute Engine zone where your cluster is deployed, and then it will create
// and manage this project-level, per-location bucket for you.
string config_bucket = 1;
// Required. The shared Compute Engine config settings for
// all instances in a cluster.
GceClusterConfig gce_cluster_config = 8;
// Optional. The Compute Engine config settings for
// the master instance in a cluster.
InstanceGroupConfig master_config = 9;
// Optional. The Compute Engine config settings for
// worker instances in a cluster.
InstanceGroupConfig worker_config = 10;
// Optional. The Compute Engine config settings for
// additional worker instances in a cluster.
InstanceGroupConfig secondary_worker_config = 12;
// Optional. The config settings for software inside the cluster.
SoftwareConfig software_config = 13;
// Optional. The config setting for auto delete cluster schedule.
LifecycleConfig lifecycle_config = 14;
// Optional. Commands to execute on each node after config is
// completed. By default, executables are run on master and all worker nodes.
// You can test a node's <code>role</code> metadata to run an executable on
// a master or worker node, as shown below using `curl` (you can also use `wget`):
//
// ROLE=$(curl -H Metadata-Flavor:Google http://metadata/computeMetadata/v1beta2/instance/attributes/dataproc-role)
// if [[ "${ROLE}" == 'Master' ]]; then
// ... master specific actions ...
// else
// ... worker specific actions ...
// fi
repeated NodeInitializationAction initialization_actions = 11;
}
// Common config settings for resources of Compute Engine cluster
// instances, applicable to all instances in the cluster.
message GceClusterConfig {
// Optional. The zone where the Compute Engine cluster will be located.
// On a create request, it is required in the "global" region. If omitted
// in a non-global Cloud Dataproc region, the service will pick a zone in the
// corresponding Compute Engine region. On a get request, zone will always be
// present.
//
// A full URL, partial URI, or short name are valid. Examples:
//
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]`
// * `projects/[project_id]/zones/[zone]`
// * `us-central1-f`
string zone_uri = 1;
// Optional. The Compute Engine network to be used for machine
// communications. Cannot be specified with subnetwork_uri. If neither
// `network_uri` nor `subnetwork_uri` is specified, the "default" network of
// the project is used, if it exists. Cannot be a "Custom Subnet Network" (see
// [Using Subnetworks](/compute/docs/subnetworks) for more information).
//
// A full URL, partial URI, or short name are valid. Examples:
//
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default`
// * `projects/[project_id]/regions/global/default`
// * `default`
string network_uri = 2;
// Optional. The Compute Engine subnetwork to be used for machine
// communications. Cannot be specified with network_uri.
//
// A full URL, partial URI, or short name are valid. Examples:
//
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/us-east1/sub0`
// * `projects/[project_id]/regions/us-east1/sub0`
// * `sub0`
string subnetwork_uri = 6;
// Optional. If true, all instances in the cluster will only have internal IP
// addresses. By default, clusters are not restricted to internal IP addresses,
// and will have ephemeral external IP addresses assigned to each instance.
// This `internal_ip_only` restriction can only be enabled for subnetwork
// enabled networks, and all off-cluster dependencies must be configured to be
// accessible without external IP addresses.
bool internal_ip_only = 7;
// Optional. The service account of the instances. Defaults to the default
// Compute Engine service account. Custom service accounts need
// permissions equivalent to the following IAM roles:
//
// * roles/logging.logWriter
// * roles/storage.objectAdmin
//
// (see https://cloud.google.com/compute/docs/access/service-accounts#custom_service_accounts
// for more information).
// Example: `[account_id]@[project_id].iam.gserviceaccount.com`
string service_account = 8;
// Optional. The URIs of service account scopes to be included in
// Compute Engine instances. The following base set of scopes is always
// included:
//
// * https://www.googleapis.com/auth/cloud.useraccounts.readonly
// * https://www.googleapis.com/auth/devstorage.read_write
// * https://www.googleapis.com/auth/logging.write
//
// If no scopes are specified, the following defaults are also provided:
//
// * https://www.googleapis.com/auth/bigquery
// * https://www.googleapis.com/auth/bigtable.admin.table
// * https://www.googleapis.com/auth/bigtable.data
// * https://www.googleapis.com/auth/devstorage.full_control
repeated string service_account_scopes = 3;
// The Compute Engine tags to add to all instances (see
// [Tagging instances](/compute/docs/label-or-tag-resources#tags)).
repeated string tags = 4;
// The Compute Engine metadata entries to add to all instances (see
// [Project and instance metadata](https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)).
map<string, string> metadata = 5;
}
// Optional. The config settings for Compute Engine resources in
// an instance group, such as a master or worker group.
message InstanceGroupConfig {
// Optional. The number of VM instances in the instance group.
// For master instance groups, must be set to 1.
int32 num_instances = 1;
// Output only. The list of instance names. Cloud Dataproc derives the names
// from `cluster_name`, `num_instances`, and the instance group.
repeated string instance_names = 2;
// Output only. The Compute Engine image resource used for cluster
// instances. Inferred from `SoftwareConfig.image_version`.
string image_uri = 3;
// Optional. The Compute Engine machine type used for cluster instances.
//
// A full URL, partial URI, or short name are valid. Examples:
//
// * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
// * `projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`
// * `n1-standard-2`
//
// **Auto Zone Exception**: If you are using the Cloud Dataproc
// [Auto Zone Placement](/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
// feature, you must use the short name of the machine type
// resource, for example, `n1-standard-2`.
string machine_type_uri = 4;
// Optional. Disk option config settings.
DiskConfig disk_config = 5;
// Optional. Specifies that this instance group contains preemptible instances.
bool is_preemptible = 6;
// Output only. The config for Compute Engine Instance Group
// Manager that manages this group.
// This is only used for preemptible instance groups.
ManagedGroupConfig managed_group_config = 7;
// Optional. The Compute Engine accelerator configuration for these
// instances.
//
// **Beta Feature**: This feature is still under development. It may be
// changed before final release.
repeated AcceleratorConfig accelerators = 8;
// Optional. Specifies the minimum cpu platform for the Instance Group.
// See [Cloud Dataproc&rarr;Minimum CPU Platform]
// (/dataproc/docs/concepts/compute/dataproc-min-cpu).
string min_cpu_platform = 9;
}
// Specifies the resources used to actively manage an instance group.
message ManagedGroupConfig {
// Output only. The name of the Instance Template used for the Managed
// Instance Group.
string instance_template_name = 1;
// Output only. The name of the Instance Group Manager for this group.
string instance_group_manager_name = 2;
}
// Specifies the type and number of accelerator cards attached to the instances
// of an instance group (see [GPUs on Compute Engine](/compute/docs/gpus/)).
message AcceleratorConfig {
// Full URL, partial URI, or short name of the accelerator type resource to
// expose to this instance. See [Compute Engine AcceleratorTypes](
// /compute/docs/reference/beta/acceleratorTypes)
//
// Examples
// * `https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
// * `projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80`
// * `nvidia-tesla-k80`
//
// **Auto Zone Exception**: If you are using the Cloud Dataproc
// [Auto Zone Placement](/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
// feature, you must use the short name of the accelerator type
// resource, for example, `nvidia-tesla-k80`.
string accelerator_type_uri = 1;
// The number of the accelerator cards of this type exposed to this instance.
int32 accelerator_count = 2;
}
// Specifies the config of disk options for a group of VM instances.
message DiskConfig {
// Optional. Type of the boot disk (default is "pd-standard").
// Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or
// "pd-standard" (Persistent Disk Hard Disk Drive).
string boot_disk_type = 3;
// Optional. Size in GB of the boot disk (default is 500GB).
int32 boot_disk_size_gb = 1;
// Optional. Number of attached SSDs, from 0 to 4 (default is 0).
// If SSDs are not attached, the boot disk is used to store runtime logs and
// [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data.
// If one or more SSDs are attached, this runtime bulk
// data is spread across them, and the boot disk contains only basic
// config and installed binaries.
int32 num_local_ssds = 2;
}
// Specifies the cluster auto delete related schedule configuration.
message LifecycleConfig {
// Optional. The longest duration that cluster would keep alive while staying
// idle; passing this threshold will cause cluster to be auto-deleted.
google.protobuf.Duration idle_delete_ttl = 1;
// Optional. Either the exact time the cluster should be deleted at or
// the cluster maximum age.
oneof ttl {
// Optional. The time when cluster will be auto-deleted.
google.protobuf.Timestamp auto_delete_time = 2;
// Optional. The life duration of cluster, the cluster will be auto-deleted
// at the end of this duration.
google.protobuf.Duration auto_delete_ttl = 3;
}
}
// Specifies an executable to run on a fully configured node and a
// timeout period for executable completion.
message NodeInitializationAction {
// Required. Cloud Storage URI of executable file.
string executable_file = 1;
// Optional. Amount of time executable has to complete. Default is
// 10 minutes. Cluster creation fails with an explanatory error message (the
// name of the executable that caused the error and the exceeded timeout
// period) if the executable is not completed at end of the timeout period.
google.protobuf.Duration execution_timeout = 2;
}
// The status of a cluster and its instances.
message ClusterStatus {
// The cluster state.
enum State {
// The cluster state is unknown.
UNKNOWN = 0;
// The cluster is being created and set up. It is not ready for use.
CREATING = 1;
// The cluster is currently running and healthy. It is ready for use.
RUNNING = 2;
// The cluster encountered an error. It is not ready for use.
ERROR = 3;
// The cluster is being deleted. It cannot be used.
DELETING = 4;
// The cluster is being updated. It continues to accept and process jobs.
UPDATING = 5;
}
// The cluster substate.
enum Substate {
// The cluster substate is unknown.
UNSPECIFIED = 0;
// The cluster is known to be in an unhealthy state
// (for example, critical daemons are not running or HDFS capacity is
// exhausted).
//
// Applies to RUNNING state.
UNHEALTHY = 1;
// The agent-reported status is out of date (may occur if
// Cloud Dataproc loses communication with Agent).
//
// Applies to RUNNING state.
STALE_STATUS = 2;
}
// Output only. The cluster's state.
State state = 1;
// Output only. Optional details of cluster's state.
string detail = 2;
// Output only. Time when this state was entered.
google.protobuf.Timestamp state_start_time = 3;
// Output only. Additional state information that includes
// status reported by the agent.
Substate substate = 4;
}
// Specifies the selection and config of software inside the cluster.
message SoftwareConfig {
// Optional. The version of software inside the cluster. It must be one of the supported
// [Cloud Dataproc Versions](/dataproc/docs/concepts/versioning/dataproc-versions#supported_cloud_dataproc_versions),
// such as "1.2" (including a subminor version, such as "1.2.29"), or the
// ["preview" version](/dataproc/docs/concepts/versioning/dataproc-versions#other_versions).
// If unspecified, it defaults to the latest version.
string image_version = 1;
// Optional. The properties to set on daemon config files.
//
// Property keys are specified in `prefix:property` format, such as
// `core:fs.defaultFS`. The following are supported prefixes
// and their mappings:
//
// * capacity-scheduler: `capacity-scheduler.xml`
// * core: `core-site.xml`
// * distcp: `distcp-default.xml`
// * hdfs: `hdfs-site.xml`
// * hive: `hive-site.xml`
// * mapred: `mapred-site.xml`
// * pig: `pig.properties`
// * spark: `spark-defaults.conf`
// * yarn: `yarn-site.xml`
//
// For more information, see
// [Cluster properties](/dataproc/docs/concepts/cluster-properties).
map<string, string> properties = 2;
}
// Contains cluster daemon metrics, such as HDFS and YARN stats.
//
// **Beta Feature**: This report is available for testing purposes only. It may
// be changed before final release.
message ClusterMetrics {
// The HDFS metrics.
map<string, int64> hdfs_metrics = 1;
// The YARN metrics.
map<string, int64> yarn_metrics = 2;
}
// A request to create a cluster.
message CreateClusterRequest {
// Required. The ID of the Google Cloud Platform project that the cluster
// belongs to.
string project_id = 1;
// Required. The Cloud Dataproc region in which to handle the request.
string region = 3;
// Required. The cluster to create.
Cluster cluster = 2;
// Optional. A unique id used to identify the request. If the server
// receives two [CreateClusterRequest][google.cloud.dataproc.v1beta2.CreateClusterRequest] requests with the same
// id, then the second request will be ignored and the
// first [google.longrunning.Operation][google.longrunning.Operation] created and stored in the backend
// is returned.
//
// It is recommended to always set this value to a
// [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
//
// The id must contain only letters (a-z, A-Z), numbers (0-9),
// underscores (_), and hyphens (-). The maximum length is 40 characters.
string request_id = 4;
}
// A request to update a cluster.
message UpdateClusterRequest {
// Required. The ID of the Google Cloud Platform project the
// cluster belongs to.
string project_id = 1;
// Required. The Cloud Dataproc region in which to handle the request.
string region = 5;
// Required. The cluster name.
string cluster_name = 2;
// Required. The changes to the cluster.
Cluster cluster = 3;
// Optional. Timeout for graceful YARN decomissioning. Graceful
// decommissioning allows removing nodes from the cluster without
// interrupting jobs in progress. Timeout specifies how long to wait for jobs
// in progress to finish before forcefully removing nodes (and potentially
// interrupting jobs). Default timeout is 0 (for forceful decommission), and
// the maximum allowed timeout is 1 day.
//
// Only supported on Dataproc image versions 1.2 and higher.
google.protobuf.Duration graceful_decommission_timeout = 6;
// Required. Specifies the path, relative to `Cluster`, of
// the field to update. For example, to change the number of workers
// in a cluster to 5, the `update_mask` parameter would be
// specified as `config.worker_config.num_instances`,
// and the `PATCH` request body would specify the new value, as follows:
//
// {
// "config":{
// "workerConfig":{
// "numInstances":"5"
// }
// }
// }
//
// Similarly, to change the number of preemptible workers in a cluster to 5, the
// `update_mask` parameter would be `config.secondary_worker_config.num_instances`,
// and the `PATCH` request body would be set as follows:
//
// {
// "config":{
// "secondaryWorkerConfig":{
// "numInstances":"5"
// }
// }
// }
// <strong>Note:</strong> currently only the following fields can be updated:
//
// <table>
// <tr>
// <td><strong>Mask</strong></td><td><strong>Purpose</strong></td>
// </tr>
// <tr>
// <td>labels</td><td>Updates labels</td>
// </tr>
// <tr>
// <td>config.worker_config.num_instances</td><td>Resize primary worker group</td>
// </tr>
// <tr>
// <td>config.secondary_worker_config.num_instances</td><td>Resize secondary worker group</td>
// </tr>
// <tr>
// <td>config.lifecycle_config.auto_delete_ttl</td><td>Reset MAX TTL duration</td>
// </tr>
// <tr>
// <td>config.lifecycle_config.auto_delete_time</td><td>Update MAX TTL deletion timestamp</td>
// </tr>
// <tr>
// <td>config.lifecycle_config.idle_delete_ttl</td><td>Update Idle TTL duration</td>
// </tr>
// </table>
google.protobuf.FieldMask update_mask = 4;
// Optional. A unique id used to identify the request. If the server
// receives two [UpdateClusterRequest][google.cloud.dataproc.v1beta2.UpdateClusterRequest] requests with the same
// id, then the second request will be ignored and the
// first [google.longrunning.Operation][google.longrunning.Operation] created and stored in the
// backend is returned.
//
// It is recommended to always set this value to a
// [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
//
// The id must contain only letters (a-z, A-Z), numbers (0-9),
// underscores (_), and hyphens (-). The maximum length is 40 characters.
string request_id = 7;
}
// A request to delete a cluster.
message DeleteClusterRequest {
// Required. The ID of the Google Cloud Platform project that the cluster
// belongs to.
string project_id = 1;
// Required. The Cloud Dataproc region in which to handle the request.
string region = 3;
// Required. The cluster name.
string cluster_name = 2;
// Optional. Specifying the `cluster_uuid` means the RPC should fail
// (with error NOT_FOUND) if cluster with specified UUID does not exist.
string cluster_uuid = 4;
// Optional. A unique id used to identify the request. If the server
// receives two [DeleteClusterRequest][google.cloud.dataproc.v1beta2.DeleteClusterRequest] requests with the same
// id, then the second request will be ignored and the
// first [google.longrunning.Operation][google.longrunning.Operation] created and stored in the
// backend is returned.
//
// It is recommended to always set this value to a
// [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
//
// The id must contain only letters (a-z, A-Z), numbers (0-9),
// underscores (_), and hyphens (-). The maximum length is 40 characters.
string request_id = 5;
}
// Request to get the resource representation for a cluster in a project.
message GetClusterRequest {
// Required. The ID of the Google Cloud Platform project that the cluster
// belongs to.
string project_id = 1;
// Required. The Cloud Dataproc region in which to handle the request.
string region = 3;
// Required. The cluster name.
string cluster_name = 2;
}
// A request to list the clusters in a project.
message ListClustersRequest {
// Required. The ID of the Google Cloud Platform project that the cluster
// belongs to.
string project_id = 1;
// Required. The Cloud Dataproc region in which to handle the request.
string region = 4;
// Optional. A filter constraining the clusters to list. Filters are
// case-sensitive and have the following syntax:
//
// field = value [AND [field = value]] ...
//
// where **field** is one of `status.state`, `clusterName`, or `labels.[KEY]`,
// and `[KEY]` is a label key. **value** can be `*` to match all values.
// `status.state` can be one of the following: `ACTIVE`, `INACTIVE`,
// `CREATING`, `RUNNING`, `ERROR`, `DELETING`, or `UPDATING`. `ACTIVE`
// contains the `CREATING`, `UPDATING`, and `RUNNING` states. `INACTIVE`
// contains the `DELETING` and `ERROR` states.
// `clusterName` is the name of the cluster provided at creation time.
// Only the logical `AND` operator is supported; space-separated items are
// treated as having an implicit `AND` operator.
//
// Example filter:
//
// status.state = ACTIVE AND clusterName = mycluster
// AND labels.env = staging AND labels.starred = *
string filter = 5;
// Optional. The standard List page size.
int32 page_size = 2;
// Optional. The standard List page token.
string page_token = 3;
}
// The list of all clusters in a project.
message ListClustersResponse {
// Output only. The clusters in the project.
repeated Cluster clusters = 1;
// Output only. This token is included in the response if there are more
// results to fetch. To fetch additional results, provide this value as the
// `page_token` in a subsequent <code>ListClustersRequest</code>.
string next_page_token = 2;
}
// A request to collect cluster diagnostic information.
message DiagnoseClusterRequest {
// Required. The ID of the Google Cloud Platform project that the cluster
// belongs to.
string project_id = 1;
// Required. The Cloud Dataproc region in which to handle the request.
string region = 3;
// Required. The cluster name.
string cluster_name = 2;
}
// The location of diagnostic output.
message DiagnoseClusterResults {
// Output only. The Cloud Storage URI of the diagnostic output.
// The output report is a plain text file with a summary of collected
// diagnostics.
string output_uri = 1;
}

View File

@@ -0,0 +1,767 @@
// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.dataproc.v1beta2;
import "google/api/annotations.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/field_mask.proto";
import "google/protobuf/timestamp.proto";
option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1beta2;dataproc";
option java_multiple_files = true;
option java_outer_classname = "JobsProto";
option java_package = "com.google.cloud.dataproc.v1beta2";
// The JobController provides methods to manage jobs.
service JobController {
// Submits a job to a cluster.
rpc SubmitJob(SubmitJobRequest) returns (Job) {
option (google.api.http) = {
post: "/v1beta2/projects/{project_id}/regions/{region}/jobs:submit"
body: "*"
};
}
// Gets the resource representation for a job in a project.
rpc GetJob(GetJobRequest) returns (Job) {
option (google.api.http) = {
get: "/v1beta2/projects/{project_id}/regions/{region}/jobs/{job_id}"
};
}
// Lists regions/{region}/jobs in a project.
rpc ListJobs(ListJobsRequest) returns (ListJobsResponse) {
option (google.api.http) = {
get: "/v1beta2/projects/{project_id}/regions/{region}/jobs"
};
}
// Updates a job in a project.
rpc UpdateJob(UpdateJobRequest) returns (Job) {
option (google.api.http) = {
patch: "/v1beta2/projects/{project_id}/regions/{region}/jobs/{job_id}"
body: "job"
};
}
// Starts a job cancellation request. To access the job resource
// after cancellation, call
// [regions/{region}/jobs.list](/dataproc/docs/reference/rest/v1beta2/projects.regions.jobs/list) or
// [regions/{region}/jobs.get](/dataproc/docs/reference/rest/v1beta2/projects.regions.jobs/get).
rpc CancelJob(CancelJobRequest) returns (Job) {
option (google.api.http) = {
post: "/v1beta2/projects/{project_id}/regions/{region}/jobs/{job_id}:cancel"
body: "*"
};
}
// Deletes the job from the project. If the job is active, the delete fails,
// and the response returns `FAILED_PRECONDITION`.
rpc DeleteJob(DeleteJobRequest) returns (google.protobuf.Empty) {
option (google.api.http) = {
delete: "/v1beta2/projects/{project_id}/regions/{region}/jobs/{job_id}"
};
}
}
// The runtime logging config of the job.
message LoggingConfig {
// The Log4j level for job execution. When running an
// [Apache Hive](http://hive.apache.org/) job, Cloud
// Dataproc configures the Hive client to an equivalent verbosity level.
enum Level {
// Level is unspecified. Use default level for log4j.
LEVEL_UNSPECIFIED = 0;
// Use ALL level for log4j.
ALL = 1;
// Use TRACE level for log4j.
TRACE = 2;
// Use DEBUG level for log4j.
DEBUG = 3;
// Use INFO level for log4j.
INFO = 4;
// Use WARN level for log4j.
WARN = 5;
// Use ERROR level for log4j.
ERROR = 6;
// Use FATAL level for log4j.
FATAL = 7;
// Turn off log4j.
OFF = 8;
}
// The per-package log levels for the driver. This may include
// "root" package name to configure rootLogger.
// Examples:
// 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
map<string, Level> driver_log_levels = 2;
}
// A Cloud Dataproc job for running
// [Apache Hadoop MapReduce](https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html)
// jobs on [Apache Hadoop YARN](https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html).
message HadoopJob {
// Required. Indicates the location of the driver's main class. Specify
// either the jar file that contains the main class or the main class name.
// To specify both, add the jar file to `jar_file_uris`, and then specify
// the main class name in this property.
oneof driver {
// The HCFS URI of the jar file containing the main class.
// Examples:
// 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar'
// 'hdfs:/tmp/test-samples/custom-wordcount.jar'
// 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
string main_jar_file_uri = 1;
// The name of the driver's main class. The jar file containing the class
// must be in the default CLASSPATH or specified in `jar_file_uris`.
string main_class = 2;
}
// Optional. The arguments to pass to the driver. Do not
// include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as job
// properties, since a collision may occur that causes an incorrect job
// submission.
repeated string args = 3;
// Optional. Jar file URIs to add to the CLASSPATHs of the
// Hadoop driver and tasks.
repeated string jar_file_uris = 4;
// Optional. HCFS (Hadoop Compatible Filesystem) URIs of files to be copied
// to the working directory of Hadoop drivers and distributed tasks. Useful
// for naively parallel tasks.
repeated string file_uris = 5;
// Optional. HCFS URIs of archives to be extracted in the working directory of
// Hadoop drivers and tasks. Supported file types:
// .jar, .tar, .tar.gz, .tgz, or .zip.
repeated string archive_uris = 6;
// Optional. A mapping of property names to values, used to configure Hadoop.
// Properties that conflict with values set by the Cloud Dataproc API may be
// overwritten. Can include properties set in /etc/hadoop/conf/*-site and
// classes in user code.
map<string, string> properties = 7;
// Optional. The runtime log config for job execution.
LoggingConfig logging_config = 8;
}
// A Cloud Dataproc job for running [Apache Spark](http://spark.apache.org/)
// applications on YARN.
message SparkJob {
// Required. The specification of the main method to call to drive the job.
// Specify either the jar file that contains the main class or the main class
// name. To pass both a main jar and a main class in that jar, add the jar to
// `CommonJob.jar_file_uris`, and then specify the main class name in `main_class`.
oneof driver {
// The HCFS URI of the jar file that contains the main class.
string main_jar_file_uri = 1;
// The name of the driver's main class. The jar file that contains the class
// must be in the default CLASSPATH or specified in `jar_file_uris`.
string main_class = 2;
}
// Optional. The arguments to pass to the driver. Do not include arguments,
// such as `--conf`, that can be set as job properties, since a collision may
// occur that causes an incorrect job submission.
repeated string args = 3;
// Optional. HCFS URIs of jar files to add to the CLASSPATHs of the
// Spark driver and tasks.
repeated string jar_file_uris = 4;
// Optional. HCFS URIs of files to be copied to the working directory of
// Spark drivers and distributed tasks. Useful for naively parallel tasks.
repeated string file_uris = 5;
// Optional. HCFS URIs of archives to be extracted in the working directory
// of Spark drivers and tasks. Supported file types:
// .jar, .tar, .tar.gz, .tgz, and .zip.
repeated string archive_uris = 6;
// Optional. A mapping of property names to values, used to configure Spark.
// Properties that conflict with values set by the Cloud Dataproc API may be
// overwritten. Can include properties set in
// /etc/spark/conf/spark-defaults.conf and classes in user code.
map<string, string> properties = 7;
// Optional. The runtime log config for job execution.
LoggingConfig logging_config = 8;
}
// A Cloud Dataproc job for running
// [Apache PySpark](https://spark.apache.org/docs/0.9.0/python-programming-guide.html)
// applications on YARN.
message PySparkJob {
// Required. The HCFS URI of the main Python file to use as the driver. Must
// be a .py file.
string main_python_file_uri = 1;
// Optional. The arguments to pass to the driver. Do not include arguments,
// such as `--conf`, that can be set as job properties, since a collision may
// occur that causes an incorrect job submission.
repeated string args = 2;
// Optional. HCFS file URIs of Python files to pass to the PySpark
// framework. Supported file types: .py, .egg, and .zip.
repeated string python_file_uris = 3;
// Optional. HCFS URIs of jar files to add to the CLASSPATHs of the
// Python driver and tasks.
repeated string jar_file_uris = 4;
// Optional. HCFS URIs of files to be copied to the working directory of
// Python drivers and distributed tasks. Useful for naively parallel tasks.
repeated string file_uris = 5;
// Optional. HCFS URIs of archives to be extracted in the working directory of
// .jar, .tar, .tar.gz, .tgz, and .zip.
repeated string archive_uris = 6;
// Optional. A mapping of property names to values, used to configure PySpark.
// Properties that conflict with values set by the Cloud Dataproc API may be
// overwritten. Can include properties set in
// /etc/spark/conf/spark-defaults.conf and classes in user code.
map<string, string> properties = 7;
// Optional. The runtime log config for job execution.
LoggingConfig logging_config = 8;
}
// A list of queries to run on a cluster.
message QueryList {
// Required. The queries to execute. You do not need to terminate a query
// with a semicolon. Multiple queries can be specified in one string
// by separating each with a semicolon. Here is an example of an Cloud
// Dataproc API snippet that uses a QueryList to specify a HiveJob:
//
// "hiveJob": {
// "queryList": {
// "queries": [
// "query1",
// "query2",
// "query3;query4",
// ]
// }
// }
repeated string queries = 1;
}
// A Cloud Dataproc job for running [Apache Hive](https://hive.apache.org/)
// queries on YARN.
message HiveJob {
// Required. The sequence of Hive queries to execute, specified as either
// an HCFS file URI or a list of queries.
oneof queries {
// The HCFS URI of the script that contains Hive queries.
string query_file_uri = 1;
// A list of queries.
QueryList query_list = 2;
}
// Optional. Whether to continue executing queries if a query fails.
// The default value is `false`. Setting to `true` can be useful when executing
// independent parallel queries.
bool continue_on_failure = 3;
// Optional. Mapping of query variable names to values (equivalent to the
// Hive command: `SET name="value";`).
map<string, string> script_variables = 4;
// Optional. A mapping of property names and values, used to configure Hive.
// Properties that conflict with values set by the Cloud Dataproc API may be
// overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
// /etc/hive/conf/hive-site.xml, and classes in user code.
map<string, string> properties = 5;
// Optional. HCFS URIs of jar files to add to the CLASSPATH of the
// Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes
// and UDFs.
repeated string jar_file_uris = 6;
}
// A Cloud Dataproc job for running [Apache Spark SQL](http://spark.apache.org/sql/)
// queries.
message SparkSqlJob {
// Required. The sequence of Spark SQL queries to execute, specified as
// either an HCFS file URI or as a list of queries.
oneof queries {
// The HCFS URI of the script that contains SQL queries.
string query_file_uri = 1;
// A list of queries.
QueryList query_list = 2;
}
// Optional. Mapping of query variable names to values (equivalent to the
// Spark SQL command: SET `name="value";`).
map<string, string> script_variables = 3;
// Optional. A mapping of property names to values, used to configure
// Spark SQL's SparkConf. Properties that conflict with values set by the
// Cloud Dataproc API may be overwritten.
map<string, string> properties = 4;
// Optional. HCFS URIs of jar files to be added to the Spark CLASSPATH.
repeated string jar_file_uris = 56;
// Optional. The runtime log config for job execution.
LoggingConfig logging_config = 6;
}
// A Cloud Dataproc job for running [Apache Pig](https://pig.apache.org/)
// queries on YARN.
message PigJob {
// Required. The sequence of Pig queries to execute, specified as an HCFS
// file URI or a list of queries.
oneof queries {
// The HCFS URI of the script that contains the Pig queries.
string query_file_uri = 1;
// A list of queries.
QueryList query_list = 2;
}
// Optional. Whether to continue executing queries if a query fails.
// The default value is `false`. Setting to `true` can be useful when executing
// independent parallel queries.
bool continue_on_failure = 3;
// Optional. Mapping of query variable names to values (equivalent to the Pig
// command: `name=[value]`).
map<string, string> script_variables = 4;
// Optional. A mapping of property names to values, used to configure Pig.
// Properties that conflict with values set by the Cloud Dataproc API may be
// overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
// /etc/pig/conf/pig.properties, and classes in user code.
map<string, string> properties = 5;
// Optional. HCFS URIs of jar files to add to the CLASSPATH of
// the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs.
repeated string jar_file_uris = 6;
// Optional. The runtime log config for job execution.
LoggingConfig logging_config = 7;
}
// Cloud Dataproc job config.
message JobPlacement {
// Required. The name of the cluster where the job will be submitted.
string cluster_name = 1;
// Output only. A cluster UUID generated by the Cloud Dataproc service when
// the job is submitted.
string cluster_uuid = 2;
}
// Cloud Dataproc job status.
message JobStatus {
// The job state.
enum State {
// The job state is unknown.
STATE_UNSPECIFIED = 0;
// The job is pending; it has been submitted, but is not yet running.
PENDING = 1;
// Job has been received by the service and completed initial setup;
// it will soon be submitted to the cluster.
SETUP_DONE = 8;
// The job is running on the cluster.
RUNNING = 2;
// A CancelJob request has been received, but is pending.
CANCEL_PENDING = 3;
// Transient in-flight resources have been canceled, and the request to
// cancel the running job has been issued to the cluster.
CANCEL_STARTED = 7;
// The job cancellation was successful.
CANCELLED = 4;
// The job has completed successfully.
DONE = 5;
// The job has completed, but encountered an error.
ERROR = 6;
// Job attempt has failed. The detail field contains failure details for
// this attempt.
//
// Applies to restartable jobs only.
ATTEMPT_FAILURE = 9;
}
// The job substate.
enum Substate {
// The job substate is unknown.
UNSPECIFIED = 0;
// The Job is submitted to the agent.
//
// Applies to RUNNING state.
SUBMITTED = 1;
// The Job has been received and is awaiting execution (it may be waiting
// for a condition to be met). See the "details" field for the reason for
// the delay.
//
// Applies to RUNNING state.
QUEUED = 2;
// The agent-reported status is out of date, which may be caused by a
// loss of communication between the agent and Cloud Dataproc. If the
// agent does not send a timely update, the job will fail.
//
// Applies to RUNNING state.
STALE_STATUS = 3;
}
// Output only. A state message specifying the overall job state.
State state = 1;
// Output only. Optional job state details, such as an error
// description if the state is <code>ERROR</code>.
string details = 2;
// Output only. The time when this state was entered.
google.protobuf.Timestamp state_start_time = 6;
// Output only. Additional state information, which includes
// status reported by the agent.
Substate substate = 7;
}
// Encapsulates the full scoping used to reference a job.
message JobReference {
// Required. The ID of the Google Cloud Platform project that the job
// belongs to.
string project_id = 1;
// Optional. The job ID, which must be unique within the project. The job ID
// is generated by the server upon job submission or provided by the user as a
// means to perform retries without creating duplicate jobs. The ID must
// contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or
// hyphens (-). The maximum length is 100 characters.
string job_id = 2;
}
// A YARN application created by a job. Application information is a subset of
// <code>org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto</code>.
//
// **Beta Feature**: This report is available for testing purposes only. It may
// be changed before final release.
message YarnApplication {
// The application state, corresponding to
// <code>YarnProtos.YarnApplicationStateProto</code>.
enum State {
// Status is unspecified.
STATE_UNSPECIFIED = 0;
// Status is NEW.
NEW = 1;
// Status is NEW_SAVING.
NEW_SAVING = 2;
// Status is SUBMITTED.
SUBMITTED = 3;
// Status is ACCEPTED.
ACCEPTED = 4;
// Status is RUNNING.
RUNNING = 5;
// Status is FINISHED.
FINISHED = 6;
// Status is FAILED.
FAILED = 7;
// Status is KILLED.
KILLED = 8;
}
// Required. The application name.
string name = 1;
// Required. The application state.
State state = 2;
// Required. The numerical progress of the application, from 1 to 100.
float progress = 3;
// Optional. The HTTP URL of the ApplicationMaster, HistoryServer, or
// TimelineServer that provides application-specific information. The URL uses
// the internal hostname, and requires a proxy server for resolution and,
// possibly, access.
string tracking_url = 4;
}
// A Cloud Dataproc job resource.
message Job {
// Optional. The fully qualified reference to the job, which can be used to
// obtain the equivalent REST path of the job resource. If this property
// is not specified when a job is created, the server generates a
// <code>job_id</code>.
JobReference reference = 1;
// Required. Job information, including how, when, and where to
// run the job.
JobPlacement placement = 2;
// Required. The application/framework-specific portion of the job.
oneof type_job {
// Job is a Hadoop job.
HadoopJob hadoop_job = 3;
// Job is a Spark job.
SparkJob spark_job = 4;
// Job is a Pyspark job.
PySparkJob pyspark_job = 5;
// Job is a Hive job.
HiveJob hive_job = 6;
// Job is a Pig job.
PigJob pig_job = 7;
// Job is a SparkSql job.
SparkSqlJob spark_sql_job = 12;
}
// Output only. The job status. Additional application-specific
// status information may be contained in the <code>type_job</code>
// and <code>yarn_applications</code> fields.
JobStatus status = 8;
// Output only. The previous job status.
repeated JobStatus status_history = 13;
// Output only. The collection of YARN applications spun up by this job.
//
// **Beta** Feature: This report is available for testing purposes only. It may
// be changed before final release.
repeated YarnApplication yarn_applications = 9;
// Output only. A URI pointing to the location of the stdout of the job's
// driver program.
string driver_output_resource_uri = 17;
// Output only. If present, the location of miscellaneous control files
// which may be used as part of job setup and handling. If not present,
// control files may be placed in the same location as `driver_output_uri`.
string driver_control_files_uri = 15;
// Optional. The labels to associate with this job.
// Label **keys** must contain 1 to 63 characters, and must conform to
// [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
// Label **values** may be empty, but, if present, must contain 1 to 63
// characters, and must conform to [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
// No more than 32 labels can be associated with a job.
map<string, string> labels = 18;
// Optional. Job scheduling configuration.
JobScheduling scheduling = 20;
}
// Job scheduling options.
message JobScheduling {
// Optional. Maximum number of times per hour a driver may be restarted as
// a result of driver terminating with non-zero code before job is
// reported failed.
//
// A job may be reported as thrashing if driver exits with non-zero code
// 4 times within 10 minute window.
//
// Maximum value is 10.
int32 max_failures_per_hour = 1;
}
// A request to submit a job.
message SubmitJobRequest {
// Required. The ID of the Google Cloud Platform project that the job
// belongs to.
string project_id = 1;
// Required. The Cloud Dataproc region in which to handle the request.
string region = 3;
// Required. The job resource.
Job job = 2;
// Optional. A unique id used to identify the request. If the server
// receives two [SubmitJobRequest][google.cloud.dataproc.v1beta2.SubmitJobRequest] requests with the same
// id, then the second request will be ignored and the
// first [Job][google.cloud.dataproc.v1beta2.Job] created and stored in the backend
// is returned.
//
// It is recommended to always set this value to a
// [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
//
// The id must contain only letters (a-z, A-Z), numbers (0-9),
// underscores (_), and hyphens (-). The maximum length is 40 characters.
string request_id = 4;
}
// A request to get the resource representation for a job in a project.
message GetJobRequest {
// Required. The ID of the Google Cloud Platform project that the job
// belongs to.
string project_id = 1;
// Required. The Cloud Dataproc region in which to handle the request.
string region = 3;
// Required. The job ID.
string job_id = 2;
}
// A request to list jobs in a project.
message ListJobsRequest {
// A matcher that specifies categories of job states.
enum JobStateMatcher {
// Match all jobs, regardless of state.
ALL = 0;
// Only match jobs in non-terminal states: PENDING, RUNNING, or
// CANCEL_PENDING.
ACTIVE = 1;
// Only match jobs in terminal states: CANCELLED, DONE, or ERROR.
NON_ACTIVE = 2;
}
// Required. The ID of the Google Cloud Platform project that the job
// belongs to.
string project_id = 1;
// Required. The Cloud Dataproc region in which to handle the request.
string region = 6;
// Optional. The number of results to return in each response.
int32 page_size = 2;
// Optional. The page token, returned by a previous call, to request the
// next page of results.
string page_token = 3;
// Optional. If set, the returned jobs list includes only jobs that were
// submitted to the named cluster.
string cluster_name = 4;
// Optional. Specifies enumerated categories of jobs to list.
// (default = match ALL jobs).
//
// If `filter` is provided, `jobStateMatcher` will be ignored.
JobStateMatcher job_state_matcher = 5;
// Optional. A filter constraining the jobs to list. Filters are
// case-sensitive and have the following syntax:
//
// [field = value] AND [field [= value]] ...
//
// where **field** is `status.state` or `labels.[KEY]`, and `[KEY]` is a label
// key. **value** can be `*` to match all values.
// `status.state` can be either `ACTIVE` or `NON_ACTIVE`.
// Only the logical `AND` operator is supported; space-separated items are
// treated as having an implicit `AND` operator.
//
// Example filter:
//
// status.state = ACTIVE AND labels.env = staging AND labels.starred = *
string filter = 7;
}
// A request to update a job.
message UpdateJobRequest {
// Required. The ID of the Google Cloud Platform project that the job
// belongs to.
string project_id = 1;
// Required. The Cloud Dataproc region in which to handle the request.
string region = 2;
// Required. The job ID.
string job_id = 3;
// Required. The changes to the job.
Job job = 4;
// Required. Specifies the path, relative to <code>Job</code>, of
// the field to update. For example, to update the labels of a Job the
// <code>update_mask</code> parameter would be specified as
// <code>labels</code>, and the `PATCH` request body would specify the new
// value. <strong>Note:</strong> Currently, <code>labels</code> is the only
// field that can be updated.
google.protobuf.FieldMask update_mask = 5;
}
// A list of jobs in a project.
message ListJobsResponse {
// Output only. Jobs list.
repeated Job jobs = 1;
// Optional. This token is included in the response if there are more results
// to fetch. To fetch additional results, provide this value as the
// `page_token` in a subsequent <code>ListJobsRequest</code>.
string next_page_token = 2;
}
// A request to cancel a job.
message CancelJobRequest {
// Required. The ID of the Google Cloud Platform project that the job
// belongs to.
string project_id = 1;
// Required. The Cloud Dataproc region in which to handle the request.
string region = 3;
// Required. The job ID.
string job_id = 2;
}
// A request to delete a job.
message DeleteJobRequest {
// Required. The ID of the Google Cloud Platform project that the job
// belongs to.
string project_id = 1;
// Required. The Cloud Dataproc region in which to handle the request.
string region = 3;
// Required. The job ID.
string job_id = 2;
}

View File

@@ -0,0 +1,83 @@
// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.dataproc.v1beta2;
import "google/api/annotations.proto";
import "google/protobuf/timestamp.proto";
option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1beta2;dataproc";
option java_multiple_files = true;
option java_outer_classname = "OperationsProto";
option java_package = "com.google.cloud.dataproc.v1beta2";
// The status of the operation.
message ClusterOperationStatus {
// The operation state.
enum State {
// Unused.
UNKNOWN = 0;
// The operation has been created.
PENDING = 1;
// The operation is running.
RUNNING = 2;
// The operation is done; either cancelled or completed.
DONE = 3;
}
// Output only. A message containing the operation state.
State state = 1;
// Output only. A message containing the detailed operation state.
string inner_state = 2;
// Output only. A message containing any operation metadata details.
string details = 3;
// Output only. The time this state was entered.
google.protobuf.Timestamp state_start_time = 4;
}
// Metadata describing the operation.
message ClusterOperationMetadata {
// Output only. Name of the cluster for the operation.
string cluster_name = 7;
// Output only. Cluster UUID for the operation.
string cluster_uuid = 8;
// Output only. Current operation status.
ClusterOperationStatus status = 9;
// Output only. The previous operation status.
repeated ClusterOperationStatus status_history = 10;
// Output only. The operation type.
string operation_type = 11;
// Output only. Short description of operation.
string description = 12;
// Output only. Labels associated with the operation
map<string, string> labels = 13;
// Output only. Errors encountered during operation execution.
repeated string warnings = 14;
}

View File

@@ -0,0 +1,25 @@
// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.dataproc.v1beta2;
import "google/api/annotations.proto";
option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1beta2;dataproc";
option java_multiple_files = true;
option java_outer_classname = "SharedProto";
option java_package = "com.google.cloud.dataproc.v1beta2";

View File

@@ -0,0 +1,544 @@
// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.dataproc.v1beta2;
import "google/api/annotations.proto";
import "google/cloud/dataproc/v1beta2/clusters.proto";
import "google/cloud/dataproc/v1beta2/jobs.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/timestamp.proto";
option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1beta2;dataproc";
option java_multiple_files = true;
option java_outer_classname = "WorkflowTemplatesProto";
option java_package = "com.google.cloud.dataproc.v1beta2";
// The API interface for managing Workflow Templates in the
// Cloud Dataproc API.
service WorkflowTemplateService {
// Creates new workflow template.
rpc CreateWorkflowTemplate(CreateWorkflowTemplateRequest) returns (WorkflowTemplate) {
option (google.api.http) = {
post: "/v1beta2/{parent=projects/*/regions/*}/workflowTemplates"
body: "template"
additional_bindings {
post: "/v1beta2/{parent=projects/*/locations/*}/workflowTemplates"
body: "template"
}
};
}
// Retrieves the latest workflow template.
//
// Can retrieve previously instantiated template by specifying optional
// version parameter.
rpc GetWorkflowTemplate(GetWorkflowTemplateRequest) returns (WorkflowTemplate) {
option (google.api.http) = {
get: "/v1beta2/{name=projects/*/regions/*/workflowTemplates/*}"
additional_bindings {
get: "/v1beta2/{name=projects/*/locations/*/workflowTemplates/*}"
}
};
}
// Instantiates a template and begins execution.
//
// The returned Operation can be used to track execution of
// workflow by polling
// [operations.get][google.longrunning.Operations.GetOperation].
// The Operation will complete when entire workflow is finished.
//
// The running workflow can be aborted via
// [operations.cancel][google.longrunning.Operations.CancelOperation].
// This will cause any inflight jobs to be cancelled and workflow-owned
// clusters to be deleted.
//
// The [Operation.metadata][google.longrunning.Operation.metadata] will be
// [WorkflowMetadata][google.cloud.dataproc.v1beta2.WorkflowMetadata].
//
// On successful completion,
// [Operation.response][google.longrunning.Operation.response] will be
// [Empty][google.protobuf.Empty].
rpc InstantiateWorkflowTemplate(InstantiateWorkflowTemplateRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1beta2/{name=projects/*/regions/*/workflowTemplates/*}:instantiate"
body: "*"
additional_bindings {
post: "/v1beta2/{name=projects/*/locations/*/workflowTemplates/*}:instantiate"
body: "*"
}
};
}
// Instantiates a template and begins execution.
//
// This method is equivalent to executing the sequence
// [CreateWorkflowTemplate][google.cloud.dataproc.v1beta2.WorkflowTemplateService.CreateWorkflowTemplate], [InstantiateWorkflowTemplate][google.cloud.dataproc.v1beta2.WorkflowTemplateService.InstantiateWorkflowTemplate],
// [DeleteWorkflowTemplate][google.cloud.dataproc.v1beta2.WorkflowTemplateService.DeleteWorkflowTemplate].
//
// The returned Operation can be used to track execution of
// workflow by polling
// [operations.get][google.longrunning.Operations.GetOperation].
// The Operation will complete when entire workflow is finished.
//
// The running workflow can be aborted via
// [operations.cancel][google.longrunning.Operations.CancelOperation].
// This will cause any inflight jobs to be cancelled and workflow-owned
// clusters to be deleted.
//
// The [Operation.metadata][google.longrunning.Operation.metadata] will be
// [WorkflowMetadata][google.cloud.dataproc.v1beta2.WorkflowMetadata].
//
// On successful completion,
// [Operation.response][google.longrunning.Operation.response] will be
// [Empty][google.protobuf.Empty].
rpc InstantiateInlineWorkflowTemplate(InstantiateInlineWorkflowTemplateRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1beta2/{parent=projects/*/regions/*}/workflowTemplates:instantiateInline"
body: "template"
additional_bindings {
post: "/v1beta2/{parent=projects/*/locations/*}/workflowTemplates:instantiateInline"
body: "template"
}
};
}
// Updates (replaces) workflow template. The updated template
// must contain version that matches the current server version.
rpc UpdateWorkflowTemplate(UpdateWorkflowTemplateRequest) returns (WorkflowTemplate) {
option (google.api.http) = {
put: "/v1beta2/{template.name=projects/*/regions/*/workflowTemplates/*}"
body: "template"
additional_bindings {
put: "/v1beta2/{template.name=projects/*/locations/*/workflowTemplates/*}"
body: "template"
}
};
}
// Lists workflows that match the specified filter in the request.
rpc ListWorkflowTemplates(ListWorkflowTemplatesRequest) returns (ListWorkflowTemplatesResponse) {
option (google.api.http) = {
get: "/v1beta2/{parent=projects/*/regions/*}/workflowTemplates"
additional_bindings {
get: "/v1beta2/{parent=projects/*/locations/*}/workflowTemplates"
}
};
}
// Deletes a workflow template. It does not cancel in-progress workflows.
rpc DeleteWorkflowTemplate(DeleteWorkflowTemplateRequest) returns (google.protobuf.Empty) {
option (google.api.http) = {
delete: "/v1beta2/{name=projects/*/regions/*/workflowTemplates/*}"
additional_bindings {
delete: "/v1beta2/{name=projects/*/locations/*/workflowTemplates/*}"
}
};
}
}
// A Cloud Dataproc workflow template resource.
message WorkflowTemplate {
// Required. The template id.
//
// The id must contain only letters (a-z, A-Z), numbers (0-9),
// underscores (_), and hyphens (-). Cannot begin or end with underscore
// or hyphen. Must consist of between 3 and 50 characters.
string id = 2;
// Output only. The "resource name" of the template, as described
// in https://cloud.google.com/apis/design/resource_names of the form
// `projects/{project_id}/regions/{region}/workflowTemplates/{template_id}`
string name = 1;
// Optional. Used to perform a consistent read-modify-write.
//
// This field should be left blank for a `CreateWorkflowTemplate` request. It
// is required for an `UpdateWorkflowTemplate` request, and must match the
// current server version. A typical update template flow would fetch the
// current template with a `GetWorkflowTemplate` request, which will return
// the current template with the `version` field filled in with the
// current server version. The user updates other fields in the template,
// then returns it as part of the `UpdateWorkflowTemplate` request.
int32 version = 3;
// Output only. The time template was created.
google.protobuf.Timestamp create_time = 4;
// Output only. The time template was last updated.
google.protobuf.Timestamp update_time = 5;
// Optional. The labels to associate with this template. These labels
// will be propagated to all jobs and clusters created by the workflow
// instance.
//
// Label **keys** must contain 1 to 63 characters, and must conform to
// [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
//
// Label **values** may be empty, but, if present, must contain 1 to 63
// characters, and must conform to
// [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
//
// No more than 32 labels can be associated with a template.
map<string, string> labels = 6;
// Required. WorkflowTemplate scheduling information.
WorkflowTemplatePlacement placement = 7;
// Required. The Directed Acyclic Graph of Jobs to submit.
repeated OrderedJob jobs = 8;
}
// Specifies workflow execution target.
//
// Either `managed_cluster` or `cluster_selector` is required.
message WorkflowTemplatePlacement {
// Required. Specifies where workflow executes; either on a managed
// cluster or an existing cluster chosen by labels.
oneof placement {
// Optional. A cluster that is managed by the workflow.
ManagedCluster managed_cluster = 1;
// Optional. A selector that chooses target cluster for jobs based
// on metadata.
//
// The selector is evaluated at the time each job is submitted.
ClusterSelector cluster_selector = 2;
}
}
// Cluster that is managed by the workflow.
message ManagedCluster {
// Required. The cluster name prefix. A unique cluster name will be formed by
// appending a random suffix.
//
// The name must contain only lower-case letters (a-z), numbers (0-9),
// and hyphens (-). Must begin with a letter. Cannot begin or end with
// hyphen. Must consist of between 2 and 35 characters.
string cluster_name = 2;
// Required. The cluster configuration.
ClusterConfig config = 3;
// Optional. The labels to associate with this cluster.
//
// Label keys must be between 1 and 63 characters long, and must conform to
// the following PCRE regular expression:
// [\p{Ll}\p{Lo}][\p{Ll}\p{Lo}\p{N}_-]{0,62}
//
// Label values must be between 1 and 63 characters long, and must conform to
// the following PCRE regular expression: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
//
// No more than 32 labels can be associated with a given cluster.
map<string, string> labels = 4;
}
// A selector that chooses target cluster for jobs based on metadata.
message ClusterSelector {
// Optional. The zone where workflow process executes. This parameter does not
// affect the selection of the cluster.
//
// If unspecified, the zone of the first cluster matching the selector
// is used.
string zone = 1;
// Required. The cluster labels. Cluster must have all labels
// to match.
map<string, string> cluster_labels = 2;
}
// A job executed by the workflow.
message OrderedJob {
// Required. The step id. The id must be unique among all jobs
// within the template.
//
// The step id is used as prefix for job id, as job
// `goog-dataproc-workflow-step-id` label, and in
// [prerequisiteStepIds][google.cloud.dataproc.v1beta2.OrderedJob.prerequisite_step_ids] field from other
// steps.
//
// The id must contain only letters (a-z, A-Z), numbers (0-9),
// underscores (_), and hyphens (-). Cannot begin or end with underscore
// or hyphen. Must consist of between 3 and 50 characters.
string step_id = 1;
// Required. The job definition.
oneof job_type {
// Job is a Hadoop job.
HadoopJob hadoop_job = 2;
// Job is a Spark job.
SparkJob spark_job = 3;
// Job is a Pyspark job.
PySparkJob pyspark_job = 4;
// Job is a Hive job.
HiveJob hive_job = 5;
// Job is a Pig job.
PigJob pig_job = 6;
// Job is a SparkSql job.
SparkSqlJob spark_sql_job = 7;
}
// Optional. The labels to associate with this job.
//
// Label keys must be between 1 and 63 characters long, and must conform to
// the following regular expression:
// [\p{Ll}\p{Lo}][\p{Ll}\p{Lo}\p{N}_-]{0,62}
//
// Label values must be between 1 and 63 characters long, and must conform to
// the following regular expression: [\p{Ll}\p{Lo}\p{N}_-]{0,63}
//
// No more than 32 labels can be associated with a given job.
map<string, string> labels = 8;
// Optional. Job scheduling configuration.
JobScheduling scheduling = 9;
// Optional. The optional list of prerequisite job step_ids.
// If not specified, the job will start at the beginning of workflow.
repeated string prerequisite_step_ids = 10;
}
// A Cloud Dataproc workflow template resource.
message WorkflowMetadata {
// The operation state.
enum State {
// Unused.
UNKNOWN = 0;
// The operation has been created.
PENDING = 1;
// The operation is running.
RUNNING = 2;
// The operation is done; either cancelled or completed.
DONE = 3;
}
// Output only. The "resource name" of the template.
string template = 1;
// Output only. The version of template at the time of
// workflow instantiation.
int32 version = 2;
// Output only. The create cluster operation metadata.
ClusterOperation create_cluster = 3;
// Output only. The workflow graph.
WorkflowGraph graph = 4;
// Output only. The delete cluster operation metadata.
ClusterOperation delete_cluster = 5;
// Output only. The workflow state.
State state = 6;
// Output only. The name of the managed cluster.
string cluster_name = 7;
// Map from parameter names to values that were used for those parameters.
map<string, string> parameters = 8;
}
// The cluster operation triggered by a workflow.
message ClusterOperation {
// Output only. The id of the cluster operation.
string operation_id = 1;
// Output only. Error, if operation failed.
string error = 2;
// Output only. Indicates the operation is done.
bool done = 3;
}
// The workflow graph.
message WorkflowGraph {
// Output only. The workflow nodes.
repeated WorkflowNode nodes = 1;
}
// The workflow node.
message WorkflowNode {
// The workflow node state.
enum NodeState {
// State is unspecified.
NODE_STATUS_UNSPECIFIED = 0;
// The node is awaiting prerequisite node to finish.
BLOCKED = 1;
// The node is runnable but not running.
RUNNABLE = 2;
// The node is running.
RUNNING = 3;
// The node completed successfully.
COMPLETED = 4;
// The node failed. A node can be marked FAILED because
// its ancestor or peer failed.
FAILED = 5;
}
// Output only. The name of the node.
string step_id = 1;
// Output only. Node's prerequisite nodes.
repeated string prerequisite_step_ids = 2;
// Output only. The job id; populated after the node enters RUNNING state.
string job_id = 3;
// Output only. The node state.
NodeState state = 5;
// Output only. The error detail.
string error = 6;
}
// A request to create a workflow template.
message CreateWorkflowTemplateRequest {
// Required. The "resource name" of the region, as described
// in https://cloud.google.com/apis/design/resource_names of the form
// `projects/{project_id}/regions/{region}`
string parent = 1;
// Required. The Dataproc workflow template to create.
WorkflowTemplate template = 2;
}
// A request to fetch a workflow template.
message GetWorkflowTemplateRequest {
// Required. The "resource name" of the workflow template, as described
// in https://cloud.google.com/apis/design/resource_names of the form
// `projects/{project_id}/regions/{region}/workflowTemplates/{template_id}`
string name = 1;
// Optional. The version of workflow template to retrieve. Only previously
// instatiated versions can be retrieved.
//
// If unspecified, retrieves the current version.
int32 version = 2;
}
// A request to instantiate a workflow template.
message InstantiateWorkflowTemplateRequest {
// Required. The "resource name" of the workflow template, as described
// in https://cloud.google.com/apis/design/resource_names of the form
// `projects/{project_id}/regions/{region}/workflowTemplates/{template_id}`
string name = 1;
// Optional. The version of workflow template to instantiate. If specified,
// the workflow will be instantiated only if the current version of
// the workflow template has the supplied version.
//
// This option cannot be used to instantiate a previous version of
// workflow template.
int32 version = 2;
// Optional. A tag that prevents multiple concurrent workflow
// instances with the same tag from running. This mitigates risk of
// concurrent instances started due to retries.
//
// It is recommended to always set this value to a
// [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
//
// The tag must contain only letters (a-z, A-Z), numbers (0-9),
// underscores (_), and hyphens (-). The maximum length is 40 characters.
string instance_id = 3;
}
// A request to instantiate an inline workflow template.
message InstantiateInlineWorkflowTemplateRequest {
// Required. The "resource name" of the workflow template region, as described
// in https://cloud.google.com/apis/design/resource_names of the form
// `projects/{project_id}/regions/{region}`
string parent = 1;
// Required. The workflow template to instantiate.
WorkflowTemplate template = 2;
// Optional. A tag that prevents multiple concurrent workflow
// instances with the same tag from running. This mitigates risk of
// concurrent instances started due to retries.
//
// It is recommended to always set this value to a
// [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
//
// The tag must contain only letters (a-z, A-Z), numbers (0-9),
// underscores (_), and hyphens (-). The maximum length is 40 characters.
string instance_id = 3;
}
// A request to update a workflow template.
message UpdateWorkflowTemplateRequest {
// Required. The updated workflow template.
//
// The `template.version` field must match the current version.
WorkflowTemplate template = 1;
}
// A request to list workflow templates in a project.
message ListWorkflowTemplatesRequest {
// Required. The "resource name" of the region, as described
// in https://cloud.google.com/apis/design/resource_names of the form
// `projects/{project_id}/regions/{region}`
string parent = 1;
// Optional. The maximum number of results to return in each response.
int32 page_size = 2;
// Optional. The page token, returned by a previous call, to request the
// next page of results.
string page_token = 3;
}
// A response to a request to list workflow templates in a project.
message ListWorkflowTemplatesResponse {
// Output only. WorkflowTemplates list.
repeated WorkflowTemplate templates = 1;
// Output only. This token is included in the response if there are more results
// to fetch. To fetch additional results, provide this value as the
// page_token in a subsequent <code>ListWorkflowTemplatesRequest</code>.
string next_page_token = 2;
}
// A request to delete a workflow template.
//
// Currently started workflows will remain running.
message DeleteWorkflowTemplateRequest {
// Required. The "resource name" of the workflow template, as described
// in https://cloud.google.com/apis/design/resource_names of the form
// `projects/{project_id}/regions/{region}/workflowTemplates/{template_id}`
string name = 1;
// Optional. The version of workflow template to delete. If specified,
// will only delete the template if the current server version matches
// specified version.
int32 version = 2;
}