from __future__ import annotations
import itertools
import types
import typing
from copy import copy, deepcopy
import numpy as np
import pandas as pd
import pandas.api.types as pdtypes
from ..exceptions import PlotnineError
from ..scales.scales import Scales
from ..utils import cross_join, match
from .strips import Strips
if typing.TYPE_CHECKING:
from typing import Any, Literal, Optional
import numpy.typing as npt
from matplotlib.gridspec import GridSpec
from plotnine.iapi import layout_details, panel_view
from plotnine.typing import (
Axes,
Coord,
EvalEnvironment,
Figure,
Ggplot,
Layers,
Layout,
Scale,
Theme,
)
[docs]class facet:
"""
Base class for all facets
Parameters
----------
scales : str in ``['fixed', 'free', 'free_x', 'free_y']``
Whether ``x`` or ``y`` scales should be allowed (free)
to vary according to the data along the rows or the
columns. Default is ``'fixed'``.
shrink : bool
Whether to shrink the scales to the output of the
statistics instead of the raw data. Default is ``True``.
labeller : str | function
How to label the facets. If it is a ``str``, it should
be one of ``'label_value'`` ``'label_both'`` or
``'label_context'``. Default is ``'label_value'``
as_table : bool
If ``True``, the facets are laid out like a table with
the highest values at the bottom-right. If ``False``
the facets are laid out like a plot with the highest
value a the top-right. Default it ``True``.
drop : bool
If ``True``, all factor levels not used in the data
will automatically be dropped. If ``False``, all
factor levels will be shown, regardless of whether
or not they appear in the data. Default is ``True``.
dir : str in ``['h', 'v']``
Direction in which to layout the panels. ``h`` for
horizontal and ``v`` for vertical.
"""
#: number of columns
ncol: int
#: number of rows
nrow: int
as_table = True
drop = True
shrink = True
#: Which axis scales are free
free: dict[Literal["x", "y"], bool]
#: A dict of parameters created depending on the data
#: (Intended for extensions)
params: dict[str, Any]
# Theme object, automatically updated before drawing the plot
theme: Theme
# Figure object on which the facet panels are created
figure: Figure
# coord object, automatically updated before drawing the plot
coordinates: Coord
# layout object, automatically updated before drawing the plot
layout: Layout
# Axes
axs: list[Axes]
# The first and last axes according to how MPL creates them.
# Used for labelling the x and y axes,
first_ax: Axes
last_ax: Axes
# Number of facet variables along the horizontal axis
num_vars_x = 0
# Number of facet variables along the vertical axis
num_vars_y = 0
# ggplot object that the facet belongs to
plot: Ggplot
# Facet strips
strips: Strips
# Control the relative size of multiple facets
# Use a subclass to change the default.
# See: facet_grid for an example
space: (
Literal["fixed", "free", "free_x", "free_y"]
| dict[Literal["x", "y"], list[int]]
) = "fixed"
grid_spec: GridSpec
def __init__(
self,
scales: Literal["fixed", "free", "free_x", "free_y"] = "fixed",
shrink: bool = True,
labeller: Literal[
"label_value", "label_both", "label_context"
] = "label_value",
as_table: bool = True,
drop: bool = True,
dir: Literal["h", "v"] = "h",
):
from .labelling import as_labeller
self.shrink = shrink
self.labeller = as_labeller(labeller)
self.as_table = as_table
self.drop = drop
self.dir = dir
self.free = {
"x": scales in ("free_x", "free"),
"y": scales in ("free_y", "free"),
}
def __radd__(self, gg: Ggplot) -> Ggplot:
"""
Add facet to ggplot object
"""
gg.facet = copy(self)
gg.facet.plot = gg
return gg
[docs] def set_properties(self, gg: Ggplot):
"""
Copy required properties from ggplot object
"""
self.axs = gg.axs
self.coordinates = gg.coordinates
self.figure = gg.figure
self.layout = gg.layout
self.theme = gg.theme
self.strips = Strips.from_facet(self)
[docs] def setup_data(self, data: list[pd.DataFrame]) -> list[pd.DataFrame]:
"""
Allow the facet to manipulate the data
Parameters
----------
data : list of dataframes
Data for each of the layers
Returns
-------
data : list of dataframes
Data for each of the layers
Notes
-----
This method will be called after :meth:`setup_params`,
therefore the `params` property will be set.
"""
return data
[docs] def setup_params(self, data: list[pd.DataFrame]):
"""
Create facet parameters
Parameters
----------
data : list of dataframes
Plot data and data for the layers
"""
self.params = {}
def init_scales(
self,
layout: pd.DataFrame,
x_scale: Optional[Scale] = None,
y_scale: Optional[Scale] = None,
) -> types.SimpleNamespace:
scales = types.SimpleNamespace()
if x_scale is not None:
n = layout["SCALE_X"].max()
scales.x = Scales([x_scale.clone() for i in range(n)])
if y_scale is not None:
n = layout["SCALE_Y"].max()
scales.y = Scales([y_scale.clone() for i in range(n)])
return scales
[docs] def map(self, data: pd.DataFrame, layout: pd.DataFrame) -> pd.DataFrame:
"""
Assign a data points to panels
Parameters
----------
data : DataFrame
Data for a layer
layout : DataFrame
As returned by self.compute_layout
Returns
-------
data : DataFrame
Data with all points mapped to the panels
on which they will be plotted.
"""
msg = "{} should implement this method."
raise NotImplementedError(msg.format(self.__class__.__name__))
[docs] def compute_layout(
self,
data: list[pd.DataFrame],
) -> pd.DataFrame:
"""
Compute layout
Parameters
----------
data : Dataframes
Dataframe for a each layer
"""
msg = "{} should implement this method."
raise NotImplementedError(msg.format(self.__class__.__name__))
[docs] def finish_data(self, data: pd.DataFrame, layout: Layout) -> pd.DataFrame:
"""
Modify data before it is drawn out by the geom
The default is to return the data without modification.
Subclasses should override this method as the require.
Parameters
----------
data : DataFrame
A single layer's data.
layout : Layout
Layout
Returns
-------
data : DataFrame
Modified layer data
"""
return data
[docs] def train_position_scales(self, layout: Layout, layers: Layers) -> facet:
"""
Compute ranges for the x and y scales
"""
_layout = layout.layout
panel_scales_x = layout.panel_scales_x
panel_scales_y = layout.panel_scales_y
# loop over each layer, training x and y scales in turn
for layer in layers:
data = layer.data
match_id = match(data["PANEL"], _layout["PANEL"])
if panel_scales_x:
x_vars = list(
set(panel_scales_x[0].aesthetics) & set(data.columns)
)
# the scale index for each data point
SCALE_X = _layout["SCALE_X"].iloc[match_id].tolist()
panel_scales_x.train(data, x_vars, SCALE_X)
if panel_scales_y:
y_vars = list(
set(panel_scales_y[0].aesthetics) & set(data.columns)
)
# the scale index for each data point
SCALE_Y = _layout["SCALE_Y"].iloc[match_id].tolist()
panel_scales_y.train(data, y_vars, SCALE_Y)
return self
[docs] def make_ax_strips(self, layout_info: layout_details, ax: Axes) -> Strips:
"""
Create strips for the facet
Parameters
----------
layout_info : dict-like
Layout information. Row from the layout table
ax : axes
Axes to label
"""
return Strips()
[docs] def set_limits_breaks_and_labels(self, panel_params: panel_view, ax: Axes):
"""
Add limits, breaks and labels to the axes
Parameters
----------
ranges : dict-like
range information for the axes
ax : Axes
Axes
"""
from .._mpl.ticker import MyFixedFormatter
def _inf_to_none(
t: tuple[float, float]
) -> tuple[float | None, float | None]:
"""
Replace infinities with None
"""
a = t[0] if np.isfinite(t[0]) else None
b = t[1] if np.isfinite(t[1]) else None
return (a, b)
# limits
ax.set_xlim(_inf_to_none(panel_params.x.range))
ax.set_ylim(_inf_to_none(panel_params.y.range))
if typing.TYPE_CHECKING:
assert callable(ax.set_xticks)
assert callable(ax.set_yticks)
# breaks, labels
ax.set_xticks(panel_params.x.breaks, panel_params.x.labels)
ax.set_yticks(panel_params.y.breaks, panel_params.y.labels)
# minor breaks
ax.set_xticks(panel_params.x.minor_breaks, minor=True)
ax.set_yticks(panel_params.y.minor_breaks, minor=True)
# When you manually set the tick labels MPL changes the locator
# so that it no longer reports the x & y positions
# Fixes https://github.com/has2k1/plotnine/issues/187
ax.xaxis.set_major_formatter(MyFixedFormatter(panel_params.x.labels))
ax.yaxis.set_major_formatter(MyFixedFormatter(panel_params.y.labels))
_property = self.theme.themeables.property
margin = _property("axis_text_x", "margin")
pad_x = margin.get_as("t", "pt")
margin = _property("axis_text_y", "margin")
pad_y = margin.get_as("r", "pt")
ax.tick_params(axis="x", which="major", pad=pad_x)
ax.tick_params(axis="y", which="major", pad=pad_y)
def __deepcopy__(self, memo: dict[Any, Any]) -> facet:
"""
Deep copy without copying the dataframe and environment
"""
cls = self.__class__
result = cls.__new__(cls)
memo[id(self)] = result
old = self.__dict__
new = result.__dict__
# don't make a deepcopy of the figure & the axes
shallow = {"figure", "axs", "first_ax", "last_ax"}
for key, item in old.items():
if key in shallow:
new[key] = old[key]
memo[id(new[key])] = new[key]
else:
new[key] = deepcopy(old[key], memo)
return result
def _create_subplots(
self, fig: Figure, layout: pd.DataFrame
) -> list[Axes]:
"""
Create suplots and return axs
"""
from matplotlib.gridspec import GridSpec
num_panels = len(layout)
axsarr = np.empty((self.nrow, self.ncol), dtype=object)
space = self.space
default_space: dict[Literal["x", "y"], list[int]] = {
"x": [1 for x in range(self.ncol)],
"y": [1 for x in range(self.nrow)],
}
if isinstance(space, str):
if space == "fixed":
space = default_space
# TODO: Implement 'free', 'free_x' & 'free_y'
else:
space = default_space
elif isinstance(space, dict):
if "x" not in space:
space["x"] = default_space["x"]
if "y" not in space:
space["y"] = default_space["y"]
if len(space["x"]) != self.ncol:
raise ValueError(
"The number of x-ratios for the facet space sizes "
"should match the number of columns."
)
if len(space["y"]) != self.nrow:
raise ValueError(
"The number of y-ratios for the facet space sizes "
"should match the number of rows."
)
gs = GridSpec(
self.nrow,
self.ncol,
height_ratios=space["y"],
width_ratios=space["x"],
)
self.grid_spec = gs
# Create axes
i = 1
for row in range(self.nrow):
for col in range(self.ncol):
axsarr[row, col] = fig.add_subplot(gs[i - 1])
i += 1
# Rearrange axes
# They are ordered to match the positions in the layout table
if self.dir == "h":
order: Literal["C", "F"] = "C"
if not self.as_table:
axsarr = axsarr[::-1]
elif self.dir == "v":
order = "F"
if not self.as_table:
axsarr = np.array([row[::-1] for row in axsarr])
else:
raise ValueError(f"Bad value `dir='{self.dir}'` for direction")
axs = axsarr.ravel(order)
# Delete unused axes
for ax in axs[num_panels:]:
fig.delaxes(ax)
axs = axs[:num_panels]
return list(axs)
[docs] def make_axes(
self, figure: Figure, layout: pd.DataFrame, coordinates: Coord
) -> list[Axes]:
"""
Create and return Matplotlib axes
"""
axs = self._create_subplots(figure, layout)
# Used for labelling the x and y axes, the first and
# last axes according to how MPL creates them.
self.first_ax = figure.axes[0]
self.last_ax = figure.axes[-1]
self.figure = figure
self.axs = axs
return axs
def _aspect_ratio(self) -> Optional[float]:
"""
Return the aspect_ratio
"""
aspect_ratio = self.theme.themeables.property("aspect_ratio")
if aspect_ratio == "auto":
# If the panels have different limits the coordinates
# cannot compute a common aspect ratio
if not self.free["x"] and not self.free["y"]:
aspect_ratio = self.coordinates.aspect(
self.layout.panel_params[0]
)
else:
aspect_ratio = None
return aspect_ratio
def combine_vars(
data: list[pd.DataFrame],
environment: EvalEnvironment,
vars: list[str],
drop: bool = True,
) -> pd.DataFrame:
"""
Generate all combinations of data needed for facetting
The first data frame in the list should be the default data
for the plot. Other data frames in the list are ones that are
added to the layers.
"""
if len(vars) == 0:
return pd.DataFrame()
# For each layer, compute the facet values
values = [
eval_facet_vars(df, vars, environment) for df in data if df is not None
]
# Form the base data frame which contains all combinations
# of facetting variables that appear in the data
has_all = [x.shape[1] == len(vars) for x in values]
if not any(has_all):
raise PlotnineError(
"At least one layer must contain all variables "
"used for facetting"
)
base = pd.concat([x for i, x in enumerate(values) if has_all[i]], axis=0)
base = base.drop_duplicates()
if not drop:
base = unique_combs(base)
# sorts according to order of factor levels
base = base.sort_values(base.columns.tolist())
# Systematically add on missing combinations
for i, value in enumerate(values):
if has_all[i] or len(value.columns) == 0:
continue
old = base.loc[:, list(base.columns.difference(value.columns))]
new = value.loc[
:, list(base.columns.intersection(value.columns))
].drop_duplicates()
if not drop:
new = unique_combs(new)
base = pd.concat([base, cross_join(old, new)], ignore_index=True)
if len(base) == 0:
raise PlotnineError("Faceting variables must have at least one value")
base = base.reset_index(drop=True)
return base
def unique_combs(df: pd.DataFrame) -> pd.DataFrame:
"""
Generate all possible combinations of the values in the columns
"""
def _unique(s: pd.Series[Any]) -> npt.NDArray[Any] | pd.Index:
if isinstance(s.dtype, pdtypes.CategoricalDtype):
return s.cat.categories
return s.unique()
# List of unique values from every column
lst = (_unique(x) for _, x in df.items())
rows = list(itertools.product(*lst))
_df = pd.DataFrame(rows, columns=df.columns)
# preserve the column dtypes
for col in df:
t = df[col].dtype
_df[col] = _df[col].astype(t, copy=False) # pyright: ignore
return _df
def layout_null() -> pd.DataFrame:
"""
Layout Null
"""
layout = pd.DataFrame(
{
"PANEL": [1],
"ROW": 1,
"COL": 1,
"SCALE_X": 1,
"SCALE_Y": 1,
"AXIS_X": True,
"AXIS_Y": True,
}
)
return layout
def add_missing_facets(
data: pd.DataFrame,
layout: pd.DataFrame,
vars: list[str],
facet_vals: pd.DataFrame,
) -> tuple[pd.DataFrame, pd.DataFrame]:
"""
Add missing facets
"""
# When in a dataframe some layer does not have all
# the facet variables, add the missing facet variables
# and create new data where the points(duplicates) are
# present in all the facets
missing_facets = list(set(vars) - set(facet_vals.columns.tolist()))
if missing_facets:
to_add = layout.loc[:, missing_facets].drop_duplicates()
to_add.reset_index(drop=True, inplace=True)
# a point for each facet, [0, 1, ..., n-1, 0, 1, ..., n-1, ...]
data_rep = np.tile(np.arange(len(data)), len(to_add))
# a facet for each point, [0, 0, 0, 1, 1, 1, ... n-1, n-1, n-1]
facet_rep = np.repeat(np.arange(len(to_add)), len(data))
data = data.iloc[data_rep, :].reset_index(drop=True)
facet_vals = facet_vals.iloc[data_rep, :].reset_index(drop=True)
to_add = to_add.iloc[facet_rep, :].reset_index(drop=True)
facet_vals = pd.concat(
[facet_vals, to_add], axis=1, ignore_index=False
)
return data, facet_vals
def eval_facet_vars(
data: pd.DataFrame, vars: list[str], env: EvalEnvironment
) -> pd.DataFrame:
"""
Evaluate facet variables
Parameters
----------
data : DataFrame
Factet dataframe
vars : list
Facet variables
env : environment
Plot environment
Returns
-------
facet_vals : DataFrame
Facet values that correspond to the specified
variables.
"""
# To allow expressions in facet formula
def I(value: Any) -> Any:
return value
env = env.with_outer_namespace({"I": I})
facet_vals = pd.DataFrame(index=data.index)
for name in vars:
if name in data:
# This is a limited solution. If a keyword is
# part of an expression it will fail in the
# else statement below
res = data[name]
elif str.isidentifier(name):
# All other non-statements
continue
else:
# Statements
try:
res = env.eval(name, inner_namespace=data)
except NameError:
continue
facet_vals[name] = res
return facet_vals