google_dataplex_task

A Dataplex task represents the work that you want Dataplex to do on a schedule. It encapsulates code, parameters, and the schedule.

To get more information about Task, see:

Example Usage - Dataplex Task Basic

data "google_project" "project" {

}

resource "google_dataplex_lake" "example" {
  name         = "tf-test-lake%{random_suffix}"
  location     = "us-central1"
  project = "my-project-name"
}


resource "google_dataplex_task" "example" {

    task_id      = "tf-test-task%{random_suffix}"
    location     = "us-central1"
    lake         = google_dataplex_lake.example.name

    description = "Test Task Basic"

    display_name = "task-basic"

    labels = { "count": "3" }

    trigger_spec  {
        type = "RECURRING"
        disabled = false
        max_retries = 3
        start_time = "2023-10-02T15:01:23Z"
        schedule = "1 * * * *"
    }

    execution_spec {
        service_account = "${data.google_project.project.number}-compute@developer.gserviceaccount.com"
        project = "my-project-name"
        max_job_execution_lifetime = "100s"
        kms_key = "234jn2kjn42k3n423"
    }

    spark {
        python_script_file = "gs://dataproc-examples/pyspark/hello-world/hello-world.py"

    }

    project = "my-project-name"

}

Example Usage - Dataplex Task Spark

# VPC network
resource "google_compute_network" "default" {
    name                    = "tf-test-workstation-cluster%{random_suffix}"
    auto_create_subnetworks = true
}

data "google_project" "project" {

}

resource "google_dataplex_lake" "example_spark" {
  name         = "tf-test-lake%{random_suffix}"
  location     = "us-central1"
  project = "my-project-name"
}


resource "google_dataplex_task" "example_spark" {

    task_id      = "tf-test-task%{random_suffix}"
    location     = "us-central1"
    lake         = google_dataplex_lake.example_spark.name
    trigger_spec  {
        type = "ON_DEMAND"
    }

    description = "task-spark-terraform"

    execution_spec {
        service_account = "${data.google_project.project.number}-compute@developer.gserviceaccount.com"
        args = {
            TASK_ARGS  = "--output_location,gs://spark-job/task-result, --output_format, json"
        }

    }

    spark {
        infrastructure_spec  {
            batch {
                executors_count = 2
                max_executors_count = 100
            }
            container_image {
                image = "test-image"
                java_jars = ["test-java-jars.jar"]
                python_packages = ["gs://bucket-name/my/path/to/lib.tar.gz"]
                properties = { "name": "wrench", "mass": "1.3kg", "count": "3" }
            }
            vpc_network  {
                    network_tags = ["test-network-tag"]
                    sub_network = google_compute_network.default.id
                }
        }
        file_uris = ["gs://terrafrom-test/test.csv"]
        archive_uris = ["gs://terraform-test/test.csv"]
        sql_script = "show databases"
    }

    project = "my-project-name"

}

Example Usage - Dataplex Task Notebook

# VPC network
resource "google_compute_network" "default" {
    name                    = "tf-test-workstation-cluster%{random_suffix}"
    auto_create_subnetworks = true
}


data "google_project" "project" {

}

resource "google_dataplex_lake" "example_notebook" {
  name         = "tf-test-lake%{random_suffix}"
  location     = "us-central1"
  project = "my-project-name"
}


resource "google_dataplex_task" "example_notebook" {

    task_id      = "tf-test-task%{random_suffix}"
    location     = "us-central1"
    lake         = google_dataplex_lake.example_notebook.name
    trigger_spec  {
        type = "RECURRING"
        schedule = "1 * * * *"
    }

    execution_spec {
        service_account = "${data.google_project.project.number}-compute@developer.gserviceaccount.com"
        args = {
            TASK_ARGS  = "--output_location,gs://spark-job-jars-anrajitha/task-result, --output_format, json"
        }
    }
    notebook {
        notebook = "gs://terraform-test/test-notebook.ipynb"
        infrastructure_spec  {
            batch {
                executors_count = 2
                max_executors_count = 100
            }
            container_image {
                image = "test-image"
                java_jars = ["test-java-jars.jar"]
                python_packages = ["gs://bucket-name/my/path/to/lib.tar.gz"]
                properties = { "name": "wrench", "mass": "1.3kg", "count": "3" }
            }
            vpc_network  {
                    network_tags = ["test-network-tag"]
                    network = google_compute_network.default.id
                }
        }
        file_uris = ["gs://terraform-test/test.csv"]
        archive_uris = ["gs://terraform-test/test.csv"]

    }
    project = "my-project-name"


}

Argument Reference

The following arguments are supported:

The trigger_spec block supports:

The execution_spec block supports:


The spark block supports:

The infrastructure_spec block supports:

The batch block supports:

The container_image block supports:

The vpc_network block supports:

The notebook block supports:

The infrastructure_spec block supports:

The batch block supports:

The container_image block supports:

The vpc_network block supports:

Attributes Reference

In addition to the arguments listed above, the following computed attributes are exported:

The execution_status block contains:

The latest_job block contains:

Timeouts

This resource provides the following Timeouts configuration options:

Import

Task can be imported using any of these accepted formats:

In Terraform v1.5.0 and later, use an import block to import Task using one of the formats above. For example:

import {
  id = "projects/{{project}}/locations/{{location}}/lakes/{{lake}}/tasks/{{task_id}}"
  to = google_dataplex_task.default
}

When using the terraform import command, Task can be imported using one of the formats above. For example:

$ terraform import google_dataplex_task.default projects/{{project}}/locations/{{location}}/lakes/{{lake}}/tasks/{{task_id}}
$ terraform import google_dataplex_task.default {{project}}/{{location}}/{{lake}}/{{task_id}}
$ terraform import google_dataplex_task.default {{location}}/{{lake}}/{{task_id}}

User Project Overrides

This resource supports User Project Overrides.