google_dataplex_datascan

Represents a user-visible job which provides the insights for the related data source.

To get more information about Datascan, see:

Example Usage - Dataplex Datascan Basic Profile

resource "google_dataplex_datascan" "basic_profile" {
  location     = "us-central1"
  data_scan_id = "dataprofile-basic"

  data {
      resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare"
  }

  execution_spec {
    trigger {
      on_demand {}
    }
  }

data_profile_spec {}

  project = "my-project-name"
}

Example Usage - Dataplex Datascan Full Profile

resource "google_dataplex_datascan" "full_profile" {
  location     = "us-central1"
  display_name = "Full Datascan Profile"
  data_scan_id = "dataprofile-full"
  description  = "Example resource - Full Datascan Profile"
  labels = {
    author = "billing"
  }

  data {
    resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare"
  }

  execution_spec {
    trigger {
      schedule {
        cron = "TZ=America/New_York 1 1 * * *"
      }
    }
  }

  data_profile_spec {
    sampling_percent = 80
    row_filter = "word_count > 10"
    include_fields {
      field_names = ["word_count"]
    }
    exclude_fields {
      field_names = ["property_type"]
    }
    post_scan_actions {
      bigquery_export {
        results_table = "//bigquery.googleapis.com/projects/my-project-name/datasets/dataplex_dataset/tables/profile_export"
      }
    }
  }

  project = "my-project-name"

  depends_on = [
    google_bigquery_dataset.source
  ]
}

resource "google_bigquery_dataset" "source" {
  dataset_id                  = "dataplex_dataset"
  friendly_name               = "test"
  description                 = "This is a test description"
  location                    = "US"
  delete_contents_on_destroy = true
}

Example Usage - Dataplex Datascan Basic Quality

resource "google_dataplex_datascan" "basic_quality" {
  location     = "us-central1"
  data_scan_id = "dataquality-basic"

  data {
    resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/samples/tables/shakespeare"
  }

  execution_spec {
    trigger {
      on_demand {}
    }
  }

  data_quality_spec {
    rules {
      dimension = "VALIDITY"
      name = "rule1"
      description = "rule 1 for validity dimension"
      table_condition_expectation {
        sql_expression = "COUNT(*) > 0"
      }
    }
  }

  project = "my-project-name"
}

Example Usage - Dataplex Datascan Full Quality

resource "google_dataplex_datascan" "full_quality" {
  location = "us-central1"
  display_name = "Full Datascan Quality"
  data_scan_id = "dataquality-full"
  description = "Example resource - Full Datascan Quality"
  labels = {
    author = "billing"
  }

  data {
    resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations"
  }

  execution_spec {
    trigger {
      schedule {
        cron = "TZ=America/New_York 1 1 * * *"
      }
    }
    field = "modified_date"
  }

  data_quality_spec {
    sampling_percent = 5
    row_filter = "station_id > 1000"
    rules {
      column = "address"
      dimension = "VALIDITY"
      threshold = 0.99
      non_null_expectation {}
    }

    rules {
      column = "council_district"
      dimension = "VALIDITY"
      ignore_null = true
      threshold = 0.9
      range_expectation {
        min_value = 1
        max_value = 10
        strict_min_enabled = true
        strict_max_enabled = false
      }
    }

    rules {
      column = "power_type"
      dimension = "VALIDITY"
      ignore_null = false
      regex_expectation {
        regex = ".*solar.*"
      }
    }

    rules {
      column = "property_type"
      dimension = "VALIDITY"
      ignore_null = false
      set_expectation {
        values = ["sidewalk", "parkland"]
      }
    }


    rules {
      column = "address"
      dimension = "UNIQUENESS"
      uniqueness_expectation {}
    }

    rules {
      column = "number_of_docks"
      dimension = "VALIDITY"
      statistic_range_expectation {
        statistic = "MEAN"
        min_value = 5
        max_value = 15
        strict_min_enabled = true
        strict_max_enabled = true
      }
    }

    rules {
      column = "footprint_length"
      dimension = "VALIDITY"
      row_condition_expectation {
        sql_expression = "footprint_length > 0 AND footprint_length <= 10"
      }
    }

    rules {
      dimension = "VALIDITY"
      table_condition_expectation {
        sql_expression = "COUNT(*) > 0"
      }
    }
  }


  project = "my-project-name"
}

Argument Reference

The following arguments are supported:

The data block supports:

The execution_spec block supports:

The trigger block supports:

The schedule block supports:


The data_quality_spec block supports:

The post_scan_actions block supports:

The bigquery_export block supports:

The rules block supports:

The range_expectation block supports:

The set_expectation block supports:

The regex_expectation block supports:

The statistic_range_expectation block supports:

The row_condition_expectation block supports:

The table_condition_expectation block supports:

The data_profile_spec block supports:

The post_scan_actions block supports:

The bigquery_export block supports:

The include_fields block supports:

The exclude_fields block supports:

Attributes Reference

In addition to the arguments listed above, the following computed attributes are exported:

The execution_status block contains:

Timeouts

This resource provides the following Timeouts configuration options:

Import

Datascan can be imported using any of these accepted formats:

In Terraform v1.5.0 and later, use an import block to import Datascan using one of the formats above. For example:

import {
  id = "projects/{{project}}/locations/{{location}}/dataScans/{{data_scan_id}}"
  to = google_dataplex_datascan.default
}

When using the terraform import command, Datascan can be imported using one of the formats above. For example:

$ terraform import google_dataplex_datascan.default projects/{{project}}/locations/{{location}}/dataScans/{{data_scan_id}}
$ terraform import google_dataplex_datascan.default {{project}}/{{location}}/{{data_scan_id}}
$ terraform import google_dataplex_datascan.default {{location}}/{{data_scan_id}}
$ terraform import google_dataplex_datascan.default {{data_scan_id}}

User Project Overrides

This resource supports User Project Overrides.