# Copyright 2015 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Define API Datasets."""
from __future__ import absolute_import
import six
import copy
import re
import google.cloud._helpers
from google.cloud.bigquery import _helpers
from google.cloud.bigquery.model import ModelReference
from google.cloud.bigquery.routine import RoutineReference
from google.cloud.bigquery.table import TableReference
_PROJECT_PREFIX_PATTERN = re.compile(
r"""
(?P<project_id>\S+\:[^.]+)\.(?P<dataset_id>[^.]+)$
""",
re.VERBOSE,
)
def _get_table_reference(self, table_id):
"""Constructs a TableReference.
Args:
table_id (str): The ID of the table.
Returns:
google.cloud.bigquery.table.TableReference:
A table reference for a table in this dataset.
"""
return TableReference(self, table_id)
def _get_model_reference(self, model_id):
"""Constructs a ModelReference.
Args:
model_id (str): the ID of the model.
Returns:
google.cloud.bigquery.model.ModelReference:
A ModelReference for a model in this dataset.
"""
return ModelReference.from_api_repr(
{"projectId": self.project, "datasetId": self.dataset_id, "modelId": model_id}
)
def _get_routine_reference(self, routine_id):
"""Constructs a RoutineReference.
Args:
routine_id (str): the ID of the routine.
Returns:
google.cloud.bigquery.routine.RoutineReference:
A RoutineReference for a routine in this dataset.
"""
return RoutineReference.from_api_repr(
{
"projectId": self.project,
"datasetId": self.dataset_id,
"routineId": routine_id,
}
)
[docs]class AccessEntry(object):
"""Represents grant of an access role to an entity.
An entry must have exactly one of the allowed :attr:`ENTITY_TYPES`. If
anything but ``view`` is set, a ``role`` is also required. ``role`` is
omitted for a ``view``, because ``view`` s are always read-only.
See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets.
Attributes:
role (str):
Role granted to the entity. The following string values are
supported: `'READER'`, `'WRITER'`, `'OWNER'`. It may also be
:data:`None` if the ``entity_type`` is ``view``.
entity_type (str):
Type of entity being granted the role. One of :attr:`ENTITY_TYPES`.
entity_id (Union[str, Dict[str, str]]):
If the ``entity_type`` is not 'view', the ``entity_id`` is the
``str`` ID of the entity being granted the role. If the
``entity_type`` is 'view', the ``entity_id`` is a ``dict``
representing the view from a different dataset to grant access to
in the following format::
{
'projectId': string,
'datasetId': string,
'tableId': string
}
Raises:
ValueError:
If the ``entity_type`` is not among :attr:`ENTITY_TYPES`, or if a
``view`` has ``role`` set, or a non ``view`` **does not** have a
``role`` set.
Examples:
>>> entry = AccessEntry('OWNER', 'userByEmail', 'user@example.com')
>>> view = {
... 'projectId': 'my-project',
... 'datasetId': 'my_dataset',
... 'tableId': 'my_table'
... }
>>> entry = AccessEntry(None, 'view', view)
"""
ENTITY_TYPES = frozenset(
["userByEmail", "groupByEmail", "domain", "specialGroup", "view"]
)
"""Allowed entity types."""
def __init__(self, role, entity_type, entity_id):
if entity_type not in self.ENTITY_TYPES:
message = "Entity type %r not among: %s" % (
entity_type,
", ".join(self.ENTITY_TYPES),
)
raise ValueError(message)
if entity_type == "view":
if role is not None:
raise ValueError(
"Role must be None for a view. Received " "role: %r" % (role,)
)
else:
if role is None:
raise ValueError(
"Role must be set for entity " "type %r" % (entity_type,)
)
self.role = role
self.entity_type = entity_type
self.entity_id = entity_id
def __eq__(self, other):
if not isinstance(other, AccessEntry):
return NotImplemented
return (
self.role == other.role
and self.entity_type == other.entity_type
and self.entity_id == other.entity_id
)
def __ne__(self, other):
return not self == other
def __repr__(self):
return "<AccessEntry: role=%s, %s=%s>" % (
self.role,
self.entity_type,
self.entity_id,
)
[docs] def to_api_repr(self):
"""Construct the API resource representation of this access entry
Returns:
Dict[str, object]: Access entry represented as an API resource
"""
resource = {self.entity_type: self.entity_id}
if self.role is not None:
resource["role"] = self.role
return resource
[docs] @classmethod
def from_api_repr(cls, resource):
"""Factory: construct an access entry given its API representation
Args:
resource (Dict[str, object]):
Access entry resource representation returned from the API
Returns:
google.cloud.bigquery.dataset.AccessEntry:
Access entry parsed from ``resource``.
Raises:
ValueError:
If the resource has more keys than ``role`` and one additional
key.
"""
entry = resource.copy()
role = entry.pop("role", None)
entity_type, entity_id = entry.popitem()
if len(entry) != 0:
raise ValueError("Entry has unexpected keys remaining.", entry)
return cls(role, entity_type, entity_id)
[docs]class DatasetReference(object):
"""DatasetReferences are pointers to datasets.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets
Args:
project (str): The ID of the project
dataset_id (str): The ID of the dataset
Raises:
ValueError: If either argument is not of type ``str``.
"""
def __init__(self, project, dataset_id):
if not isinstance(project, six.string_types):
raise ValueError("Pass a string for project")
if not isinstance(dataset_id, six.string_types):
raise ValueError("Pass a string for dataset_id")
self._project = project
self._dataset_id = dataset_id
@property
def project(self):
"""str: Project ID of the dataset."""
return self._project
@property
def dataset_id(self):
"""str: Dataset ID."""
return self._dataset_id
@property
def path(self):
"""str: URL path for the dataset based on project and dataset ID."""
return "/projects/%s/datasets/%s" % (self.project, self.dataset_id)
table = _get_table_reference
model = _get_model_reference
routine = _get_routine_reference
[docs] @classmethod
def from_api_repr(cls, resource):
"""Factory: construct a dataset reference given its API representation
Args:
resource (Dict[str, str]):
Dataset reference resource representation returned from the API
Returns:
google.cloud.bigquery.dataset.DatasetReference:
Dataset reference parsed from ``resource``.
"""
project = resource["projectId"]
dataset_id = resource["datasetId"]
return cls(project, dataset_id)
[docs] @classmethod
def from_string(cls, dataset_id, default_project=None):
"""Construct a dataset reference from dataset ID string.
Args:
dataset_id (str):
A dataset ID in standard SQL format. If ``default_project``
is not specified, this must include both the project ID and
the dataset ID, separated by ``.``.
default_project (str):
Optional. The project ID to use when ``dataset_id`` does not
include a project ID.
Returns:
DatasetReference:
Dataset reference parsed from ``dataset_id``.
Examples:
>>> DatasetReference.from_string('my-project-id.some_dataset')
DatasetReference('my-project-id', 'some_dataset')
Raises:
ValueError:
If ``dataset_id`` is not a fully-qualified dataset ID in
standard SQL format.
"""
output_dataset_id = dataset_id
output_project_id = default_project
with_prefix = _PROJECT_PREFIX_PATTERN.match(dataset_id)
if with_prefix is None:
parts = dataset_id.split(".")
else:
project_id = with_prefix.group("project_id")
dataset_id = with_prefix.group("dataset_id")
parts = [project_id, dataset_id]
if len(parts) == 1 and not default_project:
raise ValueError(
"When default_project is not set, dataset_id must be a "
"fully-qualified dataset ID in standard SQL format, "
'e.g., "project.dataset_id" got {}'.format(dataset_id)
)
elif len(parts) == 2:
output_project_id, output_dataset_id = parts
elif len(parts) > 2:
raise ValueError(
"Too many parts in dataset_id. Expected a fully-qualified "
"dataset ID in standard SQL format. e.g. "
'"project.dataset_id", got {}'.format(dataset_id)
)
return cls(output_project_id, output_dataset_id)
[docs] def to_api_repr(self):
"""Construct the API resource representation of this dataset reference
Returns:
Dict[str, str]: dataset reference represented as an API resource
"""
return {"projectId": self._project, "datasetId": self._dataset_id}
def _key(self):
"""A tuple key that uniquely describes this field.
Used to compute this instance's hashcode and evaluate equality.
Returns:
Tuple[str]: The contents of this :class:`.DatasetReference`.
"""
return (self._project, self._dataset_id)
def __eq__(self, other):
if not isinstance(other, DatasetReference):
return NotImplemented
return self._key() == other._key()
def __ne__(self, other):
return not self == other
def __hash__(self):
return hash(self._key())
def __repr__(self):
return "DatasetReference{}".format(self._key())
[docs]class Dataset(object):
"""Datasets are containers for tables.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets
Args:
dataset_ref (Union[ \
:class:`~google.cloud.bigquery.dataset.DatasetReference`, \
str, \
]):
A pointer to a dataset. If ``dataset_ref`` is a string, it must
include both the project ID and the dataset ID, separated by
``.``.
"""
_PROPERTY_TO_API_FIELD = {
"access_entries": "access",
"created": "creationTime",
"default_table_expiration_ms": "defaultTableExpirationMs",
"friendly_name": "friendlyName",
}
def __init__(self, dataset_ref):
if isinstance(dataset_ref, six.string_types):
dataset_ref = DatasetReference.from_string(dataset_ref)
self._properties = {"datasetReference": dataset_ref.to_api_repr(), "labels": {}}
@property
def project(self):
"""str: Project ID of the project bound to the dataset."""
return self._properties["datasetReference"]["projectId"]
@property
def path(self):
"""str: URL path for the dataset based on project and dataset ID."""
return "/projects/%s/datasets/%s" % (self.project, self.dataset_id)
@property
def access_entries(self):
"""List[google.cloud.bigquery.dataset.AccessEntry]: Dataset's access
entries.
``role`` augments the entity type and must be present **unless** the
entity type is ``view``.
Raises:
TypeError: If 'value' is not a sequence
ValueError:
If any item in the sequence is not an
:class:`~google.cloud.bigquery.dataset.AccessEntry`.
"""
entries = self._properties.get("access", [])
return [AccessEntry.from_api_repr(entry) for entry in entries]
@access_entries.setter
def access_entries(self, value):
if not all(isinstance(field, AccessEntry) for field in value):
raise ValueError("Values must be AccessEntry instances")
entries = [entry.to_api_repr() for entry in value]
self._properties["access"] = entries
@property
def created(self):
"""Union[datetime.datetime, None]: Datetime at which the dataset was
created (:data:`None` until set from the server).
"""
creation_time = self._properties.get("creationTime")
if creation_time is not None:
# creation_time will be in milliseconds.
return google.cloud._helpers._datetime_from_microseconds(
1000.0 * float(creation_time)
)
@property
def dataset_id(self):
"""str: Dataset ID."""
return self._properties["datasetReference"]["datasetId"]
@property
def full_dataset_id(self):
"""Union[str, None]: ID for the dataset resource (:data:`None` until
set from the server)
In the format ``project_id:dataset_id``.
"""
return self._properties.get("id")
@property
def reference(self):
"""google.cloud.bigquery.dataset.DatasetReference: A reference to this
dataset.
"""
return DatasetReference(self.project, self.dataset_id)
@property
def etag(self):
"""Union[str, None]: ETag for the dataset resource (:data:`None` until
set from the server).
"""
return self._properties.get("etag")
@property
def modified(self):
"""Union[datetime.datetime, None]: Datetime at which the dataset was
last modified (:data:`None` until set from the server).
"""
modified_time = self._properties.get("lastModifiedTime")
if modified_time is not None:
# modified_time will be in milliseconds.
return google.cloud._helpers._datetime_from_microseconds(
1000.0 * float(modified_time)
)
@property
def self_link(self):
"""Union[str, None]: URL for the dataset resource (:data:`None` until
set from the server).
"""
return self._properties.get("selfLink")
@property
def default_table_expiration_ms(self):
"""Union[int, None]: Default expiration time for tables in the dataset
(defaults to :data:`None`).
Raises:
ValueError: For invalid value types.
"""
return _helpers._int_or_none(self._properties.get("defaultTableExpirationMs"))
@default_table_expiration_ms.setter
def default_table_expiration_ms(self, value):
if not isinstance(value, six.integer_types) and value is not None:
raise ValueError("Pass an integer, or None")
self._properties["defaultTableExpirationMs"] = _helpers._str_or_none(value)
@property
def description(self):
"""Union[str, None]: Description of the dataset as set by the user
(defaults to :data:`None`).
Raises:
ValueError: for invalid value types.
"""
return self._properties.get("description")
@description.setter
def description(self, value):
if not isinstance(value, six.string_types) and value is not None:
raise ValueError("Pass a string, or None")
self._properties["description"] = value
@property
def friendly_name(self):
"""Union[str, None]: Title of the dataset as set by the user
(defaults to :data:`None`).
Raises:
ValueError: for invalid value types.
"""
return self._properties.get("friendlyName")
@friendly_name.setter
def friendly_name(self, value):
if not isinstance(value, six.string_types) and value is not None:
raise ValueError("Pass a string, or None")
self._properties["friendlyName"] = value
@property
def location(self):
"""Union[str, None]: Location in which the dataset is hosted as set by
the user (defaults to :data:`None`).
Raises:
ValueError: for invalid value types.
"""
return self._properties.get("location")
@location.setter
def location(self, value):
if not isinstance(value, six.string_types) and value is not None:
raise ValueError("Pass a string, or None")
self._properties["location"] = value
@property
def labels(self):
"""Dict[str, str]: Labels for the dataset.
This method always returns a dict. To change a dataset's labels,
modify the dict, then call
:meth:`google.cloud.bigquery.client.Client.update_dataset`. To delete
a label, set its value to :data:`None` before updating.
Raises:
ValueError: for invalid value types.
"""
return self._properties.setdefault("labels", {})
@labels.setter
def labels(self, value):
if not isinstance(value, dict):
raise ValueError("Pass a dict")
self._properties["labels"] = value
[docs] @classmethod
def from_string(cls, full_dataset_id):
"""Construct a dataset from fully-qualified dataset ID.
Args:
full_dataset_id (str):
A fully-qualified dataset ID in standard SQL format. Must
include both the project ID and the dataset ID, separated by
``.``.
Returns:
Dataset: Dataset parsed from ``full_dataset_id``.
Examples:
>>> Dataset.from_string('my-project-id.some_dataset')
Dataset(DatasetReference('my-project-id', 'some_dataset'))
Raises:
ValueError:
If ``full_dataset_id`` is not a fully-qualified dataset ID in
standard SQL format.
"""
return cls(DatasetReference.from_string(full_dataset_id))
[docs] @classmethod
def from_api_repr(cls, resource):
"""Factory: construct a dataset given its API representation
Args:
resource (Dict[str: object]):
Dataset resource representation returned from the API
Returns:
google.cloud.bigquery.dataset.Dataset:
Dataset parsed from ``resource``.
"""
if (
"datasetReference" not in resource
or "datasetId" not in resource["datasetReference"]
):
raise KeyError(
"Resource lacks required identity information:"
'["datasetReference"]["datasetId"]'
)
project_id = resource["datasetReference"]["projectId"]
dataset_id = resource["datasetReference"]["datasetId"]
dataset = cls(DatasetReference(project_id, dataset_id))
dataset._properties = copy.deepcopy(resource)
return dataset
[docs] def to_api_repr(self):
"""Construct the API resource representation of this dataset
Returns:
Dict[str, object]: The dataset represented as an API resource
"""
return copy.deepcopy(self._properties)
def _build_resource(self, filter_fields):
"""Generate a resource for ``update``."""
return _helpers._build_resource_from_properties(self, filter_fields)
table = _get_table_reference
model = _get_model_reference
routine = _get_routine_reference
def __repr__(self):
return "Dataset({})".format(repr(self.reference))
[docs]class DatasetListItem(object):
"""A read-only dataset resource from a list operation.
For performance reasons, the BigQuery API only includes some of the
dataset properties when listing datasets. Notably,
:attr:`~google.cloud.bigquery.dataset.Dataset.access_entries` is missing.
For a full list of the properties that the BigQuery API returns, see the
`REST documentation for datasets.list
<https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list>`_.
Args:
resource (Dict[str, str]):
A dataset-like resource object from a dataset list response. A
``datasetReference`` property is required.
Raises:
ValueError:
If ``datasetReference`` or one of its required members is missing
from ``resource``.
"""
def __init__(self, resource):
if "datasetReference" not in resource:
raise ValueError("resource must contain a datasetReference value")
if "projectId" not in resource["datasetReference"]:
raise ValueError(
"resource['datasetReference'] must contain a projectId value"
)
if "datasetId" not in resource["datasetReference"]:
raise ValueError(
"resource['datasetReference'] must contain a datasetId value"
)
self._properties = resource
@property
def project(self):
"""str: Project bound to the dataset."""
return self._properties["datasetReference"]["projectId"]
@property
def dataset_id(self):
"""str: Dataset ID."""
return self._properties["datasetReference"]["datasetId"]
@property
def full_dataset_id(self):
"""Union[str, None]: ID for the dataset resource (:data:`None` until
set from the server)
In the format ``project_id:dataset_id``.
"""
return self._properties.get("id")
@property
def friendly_name(self):
"""Union[str, None]: Title of the dataset as set by the user
(defaults to :data:`None`).
"""
return self._properties.get("friendlyName")
@property
def labels(self):
"""Dict[str, str]: Labels for the dataset."""
return self._properties.setdefault("labels", {})
@property
def reference(self):
"""google.cloud.bigquery.dataset.DatasetReference: A reference to this
dataset.
"""
return DatasetReference(self.project, self.dataset_id)
table = _get_table_reference
model = _get_model_reference
routine = _get_routine_reference