# -*- coding: utf-8 -*-
#
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Define resources for the BigQuery ML Models API."""
import copy
from google.protobuf import json_format
import six
import google.cloud._helpers
from google.api_core import datetime_helpers
from google.cloud.bigquery import _helpers
from google.cloud.bigquery_v2 import types
[docs]class Model(object):
"""Model represents a machine learning model resource.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/models
Args:
model_ref (Union[ \
:class:`~google.cloud.bigquery.model.ModelReference`, \
str, \
]):
A pointer to a model. If ``model_ref`` is a string, it must
included a project ID, dataset ID, and model ID, each separated
by ``.``.
"""
_PROPERTY_TO_API_FIELD = {
"expires": "expirationTime",
"friendly_name": "friendlyName",
# Even though it's not necessary for field mapping to map when the
# property name equals the resource name, we add these here so that we
# have an exhaustive list of all mutable properties.
"labels": "labels",
"description": "description",
}
def __init__(self, model_ref):
# Use _proto on read-only properties to use it's built-in type
# conversion.
self._proto = types.Model()
# Use _properties on read-write properties to match the REST API
# semantics. The BigQuery API makes a distinction between an unset
# value, a null value, and a default value (0 or ""), but the protocol
# buffer classes do not.
self._properties = {}
if isinstance(model_ref, six.string_types):
model_ref = ModelReference.from_string(model_ref)
if model_ref:
self._proto.model_reference.CopyFrom(model_ref._proto)
@property
def reference(self):
"""A :class:`~google.cloud.bigquery.model.ModelReference` pointing to
this model.
Read-only.
Returns:
google.cloud.bigquery.model.ModelReference: pointer to this model.
"""
ref = ModelReference()
ref._proto = self._proto.model_reference
return ref
@property
def project(self):
"""str: Project bound to the model"""
return self.reference.project
@property
def dataset_id(self):
"""str: ID of dataset containing the model."""
return self.reference.dataset_id
@property
def model_id(self):
"""str: The model ID."""
return self.reference.model_id
@property
def path(self):
"""str: URL path for the model's APIs."""
return self.reference.path
@property
def location(self):
"""str: The geographic location where the model resides. This value
is inherited from the dataset.
Read-only.
"""
return self._proto.location
@property
def etag(self):
"""str: ETag for the model resource (:data:`None` until
set from the server).
Read-only.
"""
return self._proto.etag
@property
def created(self):
"""Union[datetime.datetime, None]: Datetime at which the model was
created (:data:`None` until set from the server).
Read-only.
"""
value = self._proto.creation_time
if value is not None and value != 0:
# value will be in milliseconds.
return google.cloud._helpers._datetime_from_microseconds(
1000.0 * float(value)
)
@property
def modified(self):
"""Union[datetime.datetime, None]: Datetime at which the model was last
modified (:data:`None` until set from the server).
Read-only.
"""
value = self._proto.last_modified_time
if value is not None and value != 0:
# value will be in milliseconds.
return google.cloud._helpers._datetime_from_microseconds(
1000.0 * float(value)
)
@property
def model_type(self):
"""google.cloud.bigquery_v2.gapic.enums.Model.ModelType: Type of the
model resource.
Read-only.
The value is one of elements of the
:class:`~google.cloud.bigquery_v2.gapic.enums.Model.ModelType`
enumeration.
"""
return self._proto.model_type
@property
def training_runs(self):
"""Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun]: Information
for all training runs in increasing order of start time.
Read-only.
An iterable of :class:`~google.cloud.bigquery_v2.types.Model.TrainingRun`.
"""
return self._proto.training_runs
@property
def feature_columns(self):
"""Sequence[google.cloud.bigquery_v2.types.StandardSqlField]: Input
feature columns that were used to train this model.
Read-only.
An iterable of :class:`~google.cloud.bigquery_v2.types.StandardSqlField`.
"""
return self._proto.feature_columns
@property
def label_columns(self):
"""Sequence[google.cloud.bigquery_v2.types.StandardSqlField]: Label
columns that were used to train this model. The output of the model
will have a ``predicted_`` prefix to these columns.
Read-only.
An iterable of :class:`~google.cloud.bigquery_v2.types.StandardSqlField`.
"""
return self._proto.label_columns
@property
def expires(self):
"""Union[datetime.datetime, None]: The datetime when this model
expires. If not present, the model will persist indefinitely. Expired
models will be deleted and their storage reclaimed.
"""
value = self._properties.get("expirationTime")
if value is not None:
# value will be in milliseconds.
return google.cloud._helpers._datetime_from_microseconds(
1000.0 * float(value)
)
@expires.setter
def expires(self, value):
if value is not None:
value = str(google.cloud._helpers._millis_from_datetime(value))
self._properties["expirationTime"] = value
@property
def description(self):
"""Union[str, None]: Description of the model (defaults to
:data:`None`).
"""
return self._properties.get("description")
@description.setter
def description(self, value):
self._properties["description"] = value
@property
def friendly_name(self):
"""Union[str, None]: Title of the table (defaults to :data:`None`).
Raises:
ValueError: For invalid value types.
"""
return self._properties.get("friendlyName")
@friendly_name.setter
def friendly_name(self, value):
self._properties["friendlyName"] = value
@property
def labels(self):
"""Dict[str, str]: Labels for the table.
This method always returns a dict. To change a model's labels,
modify the dict, then call ``Client.update_model``. To delete a
label, set its value to :data:`None` before updating.
"""
return self._properties.setdefault("labels", {})
@labels.setter
def labels(self, value):
if value is None:
value = {}
self._properties["labels"] = value
[docs] @classmethod
def from_api_repr(cls, resource):
"""Factory: construct a model resource given its API representation
Args:
resource (Dict[str, object]):
Model resource representation from the API
Returns:
google.cloud.bigquery.model.Model: Model parsed from ``resource``.
"""
this = cls(None)
# Keep a reference to the resource as a workaround to find unknown
# field values.
this._properties = resource
# Convert from millis-from-epoch to timestamp well-known type.
# TODO: Remove this hack once CL 238585470 hits prod.
resource = copy.deepcopy(resource)
for training_run in resource.get("trainingRuns", ()):
start_time = training_run.get("startTime")
if not start_time or "-" in start_time: # Already right format?
continue
start_time = datetime_helpers.from_microseconds(1e3 * float(start_time))
training_run["startTime"] = datetime_helpers.to_rfc3339(start_time)
this._proto = json_format.ParseDict(
resource, types.Model(), ignore_unknown_fields=True
)
return this
def _build_resource(self, filter_fields):
"""Generate a resource for ``update``."""
return _helpers._build_resource_from_properties(self, filter_fields)
def __repr__(self):
return "Model(reference={})".format(repr(self.reference))
[docs]class ModelReference(object):
"""ModelReferences are pointers to models.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/models
"""
def __init__(self):
self._proto = types.ModelReference()
self._properties = {}
@property
def project(self):
"""str: Project bound to the model"""
return self._proto.project_id
@property
def dataset_id(self):
"""str: ID of dataset containing the model."""
return self._proto.dataset_id
@property
def model_id(self):
"""str: The model ID."""
return self._proto.model_id
@property
def path(self):
"""str: URL path for the model's APIs."""
return "/projects/%s/datasets/%s/models/%s" % (
self._proto.project_id,
self._proto.dataset_id,
self._proto.model_id,
)
[docs] @classmethod
def from_api_repr(cls, resource):
"""Factory: construct a model reference given its API representation
Args:
resource (Dict[str, object]):
Model reference representation returned from the API
Returns:
google.cloud.bigquery.model.ModelReference:
Model reference parsed from ``resource``.
"""
ref = cls()
# Keep a reference to the resource as a workaround to find unknown
# field values.
ref._properties = resource
ref._proto = json_format.ParseDict(
resource, types.ModelReference(), ignore_unknown_fields=True
)
return ref
[docs] @classmethod
def from_string(cls, model_id, default_project=None):
"""Construct a model reference from model ID string.
Args:
model_id (str):
A model ID in standard SQL format. If ``default_project``
is not specified, this must included a project ID, dataset
ID, and model ID, each separated by ``.``.
default_project (str):
Optional. The project ID to use when ``model_id`` does not
include a project ID.
Returns:
google.cloud.bigquery.model.ModelReference:
Model reference parsed from ``model_id``.
Raises:
ValueError:
If ``model_id`` is not a fully-qualified table ID in
standard SQL format.
"""
proj, dset, model = _helpers._parse_3_part_id(
model_id, default_project=default_project, property_name="model_id"
)
return cls.from_api_repr(
{"projectId": proj, "datasetId": dset, "modelId": model}
)
[docs] def to_api_repr(self):
"""Construct the API resource representation of this model reference.
Returns:
Dict[str, object]: Model reference represented as an API resource
"""
return json_format.MessageToDict(self._proto)
def _key(self):
"""Unique key for this model.
This is used for hashing a ModelReference.
"""
return self.project, self.dataset_id, self.model_id
def __eq__(self, other):
if not isinstance(other, ModelReference):
return NotImplemented
return self._proto == other._proto
def __ne__(self, other):
return not self == other
def __hash__(self):
return hash(self._key())
def __repr__(self):
return "ModelReference(project='{}', dataset_id='{}', project_id='{}')".format(
self.project, self.dataset_id, self.model_id
)