Running Queries#
Querying data#
Run a query and wait for it to finish:
# from google.cloud import bigquery
# client = bigquery.Client()
query = (
"SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` "
'WHERE state = "TX" '
"LIMIT 100"
)
query_job = client.query(
query,
# Location must match that of the dataset(s) referenced in the query.
location="US",
) # API request - starts the query
for row in query_job: # API request - fetches results
# Row values can be accessed by field name or index
assert row[0] == row.name == row["name"]
print(row)
Run a dry run query#
# from google.cloud import bigquery
# client = bigquery.Client()
job_config = bigquery.QueryJobConfig()
job_config.dry_run = True
job_config.use_query_cache = False
query_job = client.query(
(
"SELECT name, COUNT(*) as name_count "
"FROM `bigquery-public-data.usa_names.usa_1910_2013` "
"WHERE state = 'WA' "
"GROUP BY name"
),
# Location must match that of the dataset(s) referenced in the query.
location="US",
job_config=job_config,
) # API request
# A dry run query completes immediately.
assert query_job.state == "DONE"
assert query_job.dry_run
print("This query will process {} bytes.".format(query_job.total_bytes_processed))
Writing query results to a destination table#
See BigQuery documentation for more information on writing query results.
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_id = 'your_dataset_id'
job_config = bigquery.QueryJobConfig()
# Set the destination table
table_ref = client.dataset(dataset_id).table("your_table_id")
job_config.destination = table_ref
sql = """
SELECT corpus
FROM `bigquery-public-data.samples.shakespeare`
GROUP BY corpus;
"""
# Start the query, passing in the extra configuration.
query_job = client.query(
sql,
# Location must match that of the dataset(s) referenced in the query
# and of the destination table.
location="US",
job_config=job_config,
) # API request - starts the query
query_job.result() # Waits for the query to finish
print("Query results loaded to table {}".format(table_ref.path))
Run a query using a named query parameter#
See BigQuery documentation for more information on parameterized queries.
# from google.cloud import bigquery
# client = bigquery.Client()
query = """
SELECT word, word_count
FROM `bigquery-public-data.samples.shakespeare`
WHERE corpus = @corpus
AND word_count >= @min_word_count
ORDER BY word_count DESC;
"""
query_params = [
bigquery.ScalarQueryParameter("corpus", "STRING", "romeoandjuliet"),
bigquery.ScalarQueryParameter("min_word_count", "INT64", 250),
]
job_config = bigquery.QueryJobConfig()
job_config.query_parameters = query_params
query_job = client.query(
query,
# Location must match that of the dataset(s) referenced in the query.
location="US",
job_config=job_config,
) # API request - starts the query
# Print the results
for row in query_job:
print("{}: \t{}".format(row.word, row.word_count))
assert query_job.state == "DONE"