Running Queries#

Querying data#

Run a query and wait for it to finish:

# from google.cloud import bigquery
# client = bigquery.Client()

query = (
    "SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` "
    'WHERE state = "TX" '
    "LIMIT 100"
)
query_job = client.query(
    query,
    # Location must match that of the dataset(s) referenced in the query.
    location="US",
)  # API request - starts the query

for row in query_job:  # API request - fetches results
    # Row values can be accessed by field name or index
    assert row[0] == row.name == row["name"]
    print(row)

Run a dry run query#

# from google.cloud import bigquery
# client = bigquery.Client()

job_config = bigquery.QueryJobConfig()
job_config.dry_run = True
job_config.use_query_cache = False
query_job = client.query(
    (
        "SELECT name, COUNT(*) as name_count "
        "FROM `bigquery-public-data.usa_names.usa_1910_2013` "
        "WHERE state = 'WA' "
        "GROUP BY name"
    ),
    # Location must match that of the dataset(s) referenced in the query.
    location="US",
    job_config=job_config,
)  # API request

# A dry run query completes immediately.
assert query_job.state == "DONE"
assert query_job.dry_run

print("This query will process {} bytes.".format(query_job.total_bytes_processed))

Writing query results to a destination table#

See BigQuery documentation for more information on writing query results.

# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_id = 'your_dataset_id'

job_config = bigquery.QueryJobConfig()
# Set the destination table
table_ref = client.dataset(dataset_id).table("your_table_id")
job_config.destination = table_ref
sql = """
    SELECT corpus
    FROM `bigquery-public-data.samples.shakespeare`
    GROUP BY corpus;
"""

# Start the query, passing in the extra configuration.
query_job = client.query(
    sql,
    # Location must match that of the dataset(s) referenced in the query
    # and of the destination table.
    location="US",
    job_config=job_config,
)  # API request - starts the query

query_job.result()  # Waits for the query to finish
print("Query results loaded to table {}".format(table_ref.path))

Run a query using a named query parameter#

See BigQuery documentation for more information on parameterized queries.

# from google.cloud import bigquery
# client = bigquery.Client()

query = """
    SELECT word, word_count
    FROM `bigquery-public-data.samples.shakespeare`
    WHERE corpus = @corpus
    AND word_count >= @min_word_count
    ORDER BY word_count DESC;
"""
query_params = [
    bigquery.ScalarQueryParameter("corpus", "STRING", "romeoandjuliet"),
    bigquery.ScalarQueryParameter("min_word_count", "INT64", 250),
]
job_config = bigquery.QueryJobConfig()
job_config.query_parameters = query_params
query_job = client.query(
    query,
    # Location must match that of the dataset(s) referenced in the query.
    location="US",
    job_config=job_config,
)  # API request - starts the query

# Print the results
for row in query_job:
    print("{}: \t{}".format(row.word, row.word_count))

assert query_job.state == "DONE"