Source code for google.cloud.bigquery.schema

# Copyright 2015 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Schemas for BigQuery tables / queries."""

from google.cloud.bigquery_v2 import types


_STRUCT_TYPES = ("RECORD", "STRUCT")

# SQL types reference:
# https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
LEGACY_TO_STANDARD_TYPES = {
    "STRING": types.StandardSqlDataType.STRING,
    "BYTES": types.StandardSqlDataType.BYTES,
    "INTEGER": types.StandardSqlDataType.INT64,
    "INT64": types.StandardSqlDataType.INT64,
    "FLOAT": types.StandardSqlDataType.FLOAT64,
    "FLOAT64": types.StandardSqlDataType.FLOAT64,
    "NUMERIC": types.StandardSqlDataType.NUMERIC,
    "BOOLEAN": types.StandardSqlDataType.BOOL,
    "BOOL": types.StandardSqlDataType.BOOL,
    "GEOGRAPHY": types.StandardSqlDataType.GEOGRAPHY,
    "RECORD": types.StandardSqlDataType.STRUCT,
    "STRUCT": types.StandardSqlDataType.STRUCT,
    "TIMESTAMP": types.StandardSqlDataType.TIMESTAMP,
    "DATE": types.StandardSqlDataType.DATE,
    "TIME": types.StandardSqlDataType.TIME,
    "DATETIME": types.StandardSqlDataType.DATETIME,
    # no direct conversion from ARRAY, the latter is represented by mode="REPEATED"
}
"""String names of the legacy SQL types to integer codes of Standard SQL types."""


[docs]class SchemaField(object): """Describe a single field within a table schema. Args: name (str): the name of the field. field_type (str): the type of the field. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#schema.fields.type mode (str): the mode of the field. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#schema.fields.mode description (Optional[str]):description for the field. fields (Tuple[:class:`~google.cloud.bigquery.schema.SchemaField`]): subfields (requires ``field_type`` of 'RECORD'). """ def __init__(self, name, field_type, mode="NULLABLE", description=None, fields=()): self._name = name self._field_type = field_type self._mode = mode self._description = description self._fields = tuple(fields)
[docs] @classmethod def from_api_repr(cls, api_repr): """Return a ``SchemaField`` object deserialized from a dictionary. Args: api_repr (Mapping[str, str]): The serialized representation of the SchemaField, such as what is output by :meth:`to_api_repr`. Returns: google.cloud.biquery.schema.SchemaField: The ``SchemaField`` object. """ # Handle optional properties with default values mode = api_repr.get("mode", "NULLABLE") description = api_repr.get("description") fields = api_repr.get("fields", ()) return cls( field_type=api_repr["type"].upper(), fields=[cls.from_api_repr(f) for f in fields], mode=mode.upper(), description=description, name=api_repr["name"], )
@property def name(self): """str: The name of the field.""" return self._name @property def field_type(self): """str: The type of the field. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#schema.fields.type """ return self._field_type @property def mode(self): """str: The mode of the field. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#schema.fields.mode """ return self._mode @property def is_nullable(self): """bool: whether 'mode' is 'nullable'.""" return self._mode == "NULLABLE" @property def description(self): """Optional[str]: description for the field.""" return self._description @property def fields(self): """tuple: Subfields contained in this field. Must be empty unset if ``field_type`` is not 'RECORD'. """ return self._fields
[docs] def to_api_repr(self): """Return a dictionary representing this schema field. Returns: dict: A dictionary representing the SchemaField in a serialized form. """ # Put together the basic representation. See http://bit.ly/2hOAT5u. answer = { "mode": self.mode.upper(), "name": self.name, "type": self.field_type.upper(), "description": self.description, } # If this is a RECORD type, then sub-fields are also included, # add this to the serialized representation. if self.field_type.upper() in _STRUCT_TYPES: answer["fields"] = [f.to_api_repr() for f in self.fields] # Done; return the serialized dictionary. return answer
def _key(self): """A tuple key that uniquely describes this field. Used to compute this instance's hashcode and evaluate equality. Returns: tuple: The contents of this :class:`~google.cloud.bigquery.schema.SchemaField`. """ return ( self._name, self._field_type.upper(), self._mode.upper(), self._description, self._fields, )
[docs] def to_standard_sql(self): """Return the field as the standard SQL field representation object. Returns: An instance of :class:`~google.cloud.bigquery_v2.types.StandardSqlField`. """ sql_type = types.StandardSqlDataType() if self.mode == "REPEATED": sql_type.type_kind = types.StandardSqlDataType.ARRAY else: sql_type.type_kind = LEGACY_TO_STANDARD_TYPES.get( self.field_type, types.StandardSqlDataType.TYPE_KIND_UNSPECIFIED ) if sql_type.type_kind == types.StandardSqlDataType.ARRAY: # noqa: E721 array_element_type = LEGACY_TO_STANDARD_TYPES.get( self.field_type, types.StandardSqlDataType.TYPE_KIND_UNSPECIFIED ) sql_type.array_element_type.type_kind = array_element_type # ARRAY cannot directly contain other arrays, only scalar types and STRUCTs # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#array-type if array_element_type == types.StandardSqlDataType.STRUCT: # noqa: E721 sql_type.array_element_type.struct_type.fields.extend( field.to_standard_sql() for field in self.fields ) elif sql_type.type_kind == types.StandardSqlDataType.STRUCT: # noqa: E721 sql_type.struct_type.fields.extend( field.to_standard_sql() for field in self.fields ) return types.StandardSqlField(name=self.name, type=sql_type)
def __eq__(self, other): if not isinstance(other, SchemaField): return NotImplemented return self._key() == other._key() def __ne__(self, other): return not self == other def __hash__(self): return hash(self._key()) def __repr__(self): return "SchemaField{}".format(self._key())
def _parse_schema_resource(info): """Parse a resource fragment into a schema field. Args: info: (Mapping[str->dict]): should contain a "fields" key to be parsed Returns: (Union[Sequence[:class:`google.cloud.bigquery.schema.SchemaField`],None]) a list of parsed fields, or ``None`` if no "fields" key found. """ if "fields" not in info: return () schema = [] for r_field in info["fields"]: name = r_field["name"] field_type = r_field["type"] mode = r_field.get("mode", "NULLABLE") description = r_field.get("description") sub_fields = _parse_schema_resource(r_field) schema.append(SchemaField(name, field_type, mode, description, sub_fields)) return schema def _build_schema_resource(fields): """Generate a resource fragment for a schema. Args: fields [Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`]): schema to be dumped Returns: (Sequence[dict]) mappings describing the schema of the supplied fields. """ return [field.to_api_repr() for field in fields]