Managing Tables#
Tables exist within datasets. See BigQuery documentation for more information on Tables.
Listing Tables#
List the tables belonging to a dataset with the
list_tables()
method:
# TODO(developer): Import the client library.
# from google.cloud import bigquery
# TODO(developer): Construct a BigQuery client object.
# client = bigquery.Client()
# TODO(developer): Set dataset_id to the ID of the dataset that contains
# the tables you are listing.
# dataset_id = 'your-project.your_dataset'
tables = client.list_tables(dataset_id) # Make an API request.
print("Tables contained in '{}':".format(dataset_id))
for table in tables:
print("{}.{}.{}".format(table.project, table.dataset_id, table.table_id))
Getting a Table#
Get a table resource with the
get_table()
method:
# TODO(developer): Import the client library.
# from google.cloud import bigquery
# TODO(developer): Construct a BigQuery client object.
# client = bigquery.Client()
# TODO(developer): Set table_id to the ID of the model to fetch.
# table_id = 'your-project.your_dataset.your_table'
table = client.get_table(table_id) # Make an API request.
# View table properties
print(
"Got table '{}.{}.{}'.".format(table.project, table.dataset_id, table.table_id)
)
print("Table schema: {}".format(table.schema))
print("Table description: {}".format(table.description))
print("Table has {} rows".format(table.num_rows))
Browse data rows in a table with the
list_rows()
method:
# TODO(developer): Import the client library.
# from google.cloud import bigquery
# TODO(developer): Construct a BigQuery client object.
# client = bigquery.Client()
# TODO(developer): Set table_id to the ID of the table to browse data rows.
# table_id = "your-project.your_dataset.your_table_name"
# Download all rows from a table.
rows_iter = client.list_rows(table_id) # Make an API request.
# Iterate over rows to make the API requests to fetch row data.
rows = list(rows_iter)
print("Downloaded {} rows from table {}".format(len(rows), table_id))
# Download at most 10 rows.
rows_iter = client.list_rows(table_id, max_results=10)
rows = list(rows_iter)
print("Downloaded {} rows from table {}".format(len(rows), table_id))
# Specify selected fields to limit the results to certain columns.
table = client.get_table(table_id) # Make an API request.
fields = table.schema[:2] # first two columns
rows_iter = client.list_rows(table_id, selected_fields=fields, max_results=10)
rows = list(rows_iter)
print("Selected {} columns from table {}.".format(len(rows_iter.schema), table_id))
print("Downloaded {} rows from table {}".format(len(rows), table_id))
# Print row data in tabular format.
rows = client.list_rows(table, max_results=10)
format_string = "{!s:<16} " * len(rows.schema)
field_names = [field.name for field in rows.schema]
print(format_string.format(*field_names)) # Prints column headers.
for row in rows:
print(format_string.format(*row)) # Prints row data.
Creating a Table#
Create an empty table with the
create_table()
method:
from google.cloud import bigquery
# TODO(developer): Construct a BigQuery client object.
# client = bigquery.Client()
# TODO(developer): Set table_id to the ID of the table to create
# table_id = "your-project.your_dataset.your_table_name"
schema = [
bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
]
table = bigquery.Table(table_id, schema=schema)
table = client.create_table(table) # Make an API request.
print(
"Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id)
)
Load table data from a file with the
load_table_from_file()
method:
# from google.cloud import bigquery
# client = bigquery.Client()
# filename = '/path/to/file.csv'
# dataset_id = 'my_dataset'
# table_id = 'my_table'
dataset_ref = client.dataset(dataset_id)
table_ref = dataset_ref.table(table_id)
job_config = bigquery.LoadJobConfig()
job_config.source_format = bigquery.SourceFormat.CSV
job_config.skip_leading_rows = 1
job_config.autodetect = True
with open(filename, "rb") as source_file:
job = client.load_table_from_file(source_file, table_ref, job_config=job_config)
job.result() # Waits for table load to complete.
print("Loaded {} rows into {}:{}.".format(job.output_rows, dataset_id, table_id))
Load a CSV file from Cloud Storage with the
load_table_from_uri()
method:
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_id = 'my_dataset'
dataset_ref = client.dataset(dataset_id)
job_config = bigquery.LoadJobConfig()
job_config.schema = [
bigquery.SchemaField("name", "STRING"),
bigquery.SchemaField("post_abbr", "STRING"),
]
job_config.skip_leading_rows = 1
# The source format defaults to CSV, so the line below is optional.
job_config.source_format = bigquery.SourceFormat.CSV
uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv"
load_job = client.load_table_from_uri(
uri, dataset_ref.table("us_states"), job_config=job_config
) # API request
print("Starting job {}".format(load_job.job_id))
load_job.result() # Waits for table load to complete.
print("Job finished.")
destination_table = client.get_table(dataset_ref.table("us_states"))
print("Loaded {} rows.".format(destination_table.num_rows))
See also: Loading CSV data from Cloud Storage.
Load a JSON file from Cloud Storage:
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_id = 'my_dataset'
dataset_ref = client.dataset(dataset_id)
job_config = bigquery.LoadJobConfig()
job_config.schema = [
bigquery.SchemaField("name", "STRING"),
bigquery.SchemaField("post_abbr", "STRING"),
]
job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON
uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json"
load_job = client.load_table_from_uri(
uri,
dataset_ref.table("us_states"),
location="US", # Location must match that of the destination dataset.
job_config=job_config,
) # API request
print("Starting job {}".format(load_job.job_id))
load_job.result() # Waits for table load to complete.
print("Job finished.")
destination_table = client.get_table(dataset_ref.table("us_states"))
print("Loaded {} rows.".format(destination_table.num_rows))
See also: Loading JSON data from Cloud Storage.
Load a Parquet file from Cloud Storage:
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_id = 'my_dataset'
dataset_ref = client.dataset(dataset_id)
job_config = bigquery.LoadJobConfig()
job_config.source_format = bigquery.SourceFormat.PARQUET
uri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet"
load_job = client.load_table_from_uri(
uri, dataset_ref.table("us_states"), job_config=job_config
) # API request
print("Starting job {}".format(load_job.job_id))
load_job.result() # Waits for table load to complete.
print("Job finished.")
destination_table = client.get_table(dataset_ref.table("us_states"))
print("Loaded {} rows.".format(destination_table.num_rows))
See also: Loading Parquet data from Cloud Storage.
Updating a Table#
Update a property in a table’s metadata with the
update_table()
method:
# from google.cloud import bigquery
# client = bigquery.Client()
# table_ref = client.dataset('my_dataset').table('my_table')
# table = client.get_table(table_ref) # API request
assert table.description == "Original description."
table.description = "Updated description."
table = client.update_table(table, ["description"]) # API request
assert table.description == "Updated description."
Insert rows into a table’s data with the
insert_rows()
method:
# TODO(developer): Uncomment the lines below and replace with your values.
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_id = 'my_dataset' # replace with your dataset ID
# For this sample, the table must already exist and have a defined schema
# table_id = 'my_table' # replace with your table ID
# table_ref = client.dataset(dataset_id).table(table_id)
# table = client.get_table(table_ref) # API request
rows_to_insert = [(u"Phred Phlyntstone", 32), (u"Wylma Phlyntstone", 29)]
errors = client.insert_rows(table, rows_to_insert) # API request
assert errors == []
Add an empty column to the existing table with the
update_table()
method:
from google.cloud import bigquery
# TODO(developer): Construct a BigQuery client object.
# client = bigquery.Client()
# TODO(developer): Set table_id to the ID of the table
# to add an empty column.
# table_id = "your-project.your_dataset.your_table_name"
table = client.get_table(table_id) # Make an API request.
original_schema = table.schema
new_schema = original_schema[:] # creates a copy of the schema
new_schema.append(bigquery.SchemaField("phone", "STRING"))
table.schema = new_schema
table = client.update_table(table, ["schema"]) # Make an API request.
if len(table.schema) == len(original_schema) + 1 == len(new_schema):
print("A new column has been added.")
else:
print("The column has not been added.")
Copying a Table#
Copy a table with the
copy_table()
method:
# from google.cloud import bigquery
# client = bigquery.Client()
source_dataset = client.dataset("samples", project="bigquery-public-data")
source_table_ref = source_dataset.table("shakespeare")
# dataset_id = 'my_dataset'
dest_table_ref = client.dataset(dataset_id).table("destination_table")
job = client.copy_table(
source_table_ref,
dest_table_ref,
# Location must match that of the source and destination tables.
location="US",
) # API request
job.result() # Waits for job to complete.
assert job.state == "DONE"
dest_table = client.get_table(dest_table_ref) # API request
assert dest_table.num_rows > 0
Copy table data to Google Cloud Storage with the
extract_table()
method:
# from google.cloud import bigquery
# client = bigquery.Client()
# bucket_name = 'my-bucket'
project = "bigquery-public-data"
dataset_id = "samples"
table_id = "shakespeare"
destination_uri = "gs://{}/{}".format(bucket_name, "shakespeare.csv")
dataset_ref = client.dataset(dataset_id, project=project)
table_ref = dataset_ref.table(table_id)
extract_job = client.extract_table(
table_ref,
destination_uri,
# Location must match that of the source table.
location="US",
) # API request
extract_job.result() # Waits for job to complete.
print(
"Exported {}:{}.{} to {}".format(project, dataset_id, table_id, destination_uri)
)
Deleting a Table#
Delete a table with the
delete_table()
method:
# TODO(developer): Import the client library.
# from google.cloud import bigquery
# TODO(developer): Construct a BigQuery client object.
# client = bigquery.Client()
# TODO(developer): Set table_id to the ID of the table to fetch.
# table_id = 'your-project.your_dataset.your_table'
# If the table does not exist, delete_table raises
# google.api_core.exceptions.NotFound unless not_found_ok is True
client.delete_table(table_id, not_found_ok=True) # Make an API request.
print("Deleted table '{}'.".format(table_id))