from __future__ import annotations
import numbers
import typing
import numpy as np
import pandas as pd
import pandas.api.types as pdtypes
from ..exceptions import PlotnineError
if typing.TYPE_CHECKING:
from typing import Any
from plotnine.typing import EvalEnvironment
from . import aes
__all__ = ("after_stat", "after_scale", "stage")
_TPL_EVAL_FAIL = """\
Could not evaluate the '{}' mapping: '{}' \
(original error: {})"""
_TPL_BAD_EVAL_TYPE = """\
The '{}' mapping: '{}' produced a value of type '{}',\
but only single items and lists/arrays can be used. \
(original error: {})"""
[docs]class stage:
"""
Stage allows you evaluating mapping at more than one stage
You can evaluate an expression of a variable in a dataframe, and
later evaluate an expression that modifies the values mapped to
the scale.
Parameters
----------
start : expression | array_like | scalar
Aesthetic expression using primary variables from the layer
data.
after_stat : expression
Aesthetic expression using variables calculated by the stat.
after_scale : expression
Aesthetic expression using aesthetics of the layer.
"""
def __init__(self, start=None, after_stat=None, after_scale=None):
self.start = start
self.after_stat = after_stat
self.after_scale = after_scale
def __repr__(self):
"""
Repr for staged mapping
"""
# Shorter representation when the mapping happens at a
# single stage
if self.after_stat is None and self.after_scale is None:
return f"{repr(self.start)}"
if self.start is None and self.after_scale is None:
return f"after_stat({repr(self.after_stat)})"
if self.start is None and self.after_stat is None:
return f"after_scale({repr(self.after_scale)})"
return (
f"stage(start={repr(self.start)}, "
f"after_stat={repr(self.after_stat)}, "
f"after_scale={repr(self.after_scale)})"
)
[docs]def after_stat(x):
"""
Evaluate mapping after statistic has been calculated
Parameters
----------
x : str
An expression
See Also
--------
:func:`after_scale` : For how to alter aesthetics after the data has been
mapped by the scale.
:class:`stage` : For how to map to aesthetics at more than one stage of
the plot building pipeline.
"""
return stage(after_stat=x)
[docs]def after_scale(x):
"""
Evaluate mapping after variable has been mapped to the scale
This gives the user a chance to alter the value of a variable
in the final units of the scale e.g. the rgb hex color.
Parameters
----------
x : str
An expression
See Also
--------
:func:`after_stat` : For how to map aesthetics to variable calculated
by the stat
:class:`stage` : For how to map to aesthetics at more than one stage of
the plot building pipeline.
"""
return stage(after_scale=x)
def reorder(x, y, fun=np.median, ascending=True):
"""
Reorder categorical by sorting along another variable
It is the order of the categories that changes. Values in x
are grouped by categories and summarised to determine the
new order.
Credit: Copied from plydata
Parameters
----------
x : list-like
Values that will make up the categorical.
y : list-like
Values by which ``c`` will be ordered.
fun : callable
Summarising function to ``x`` for each category in ``c``.
Default is the *median*.
ascending : bool
If ``True``, the ``c`` is ordered in ascending order of ``x``.
Examples
--------
>>> c = list('abbccc')
>>> x = [11, 2, 2, 3, 33, 3]
>>> cat_reorder(c, x)
[a, b, b, c, c, c]
Categories (3, object): [b, c, a]
>>> cat_reorder(c, x, fun=max)
[a, b, b, c, c, c]
Categories (3, object): [b, a, c]
>>> cat_reorder(c, x, fun=max, ascending=False)
[a, b, b, c, c, c]
Categories (3, object): [c, a, b]
>>> c_ordered = pd.Categorical(c, ordered=True)
>>> cat_reorder(c_ordered, x)
[a, b, b, c, c, c]
Categories (3, object): [b < c < a]
>>> cat_reorder(c + ['d'], x)
Traceback (most recent call last):
...
ValueError: Lengths are not equal. len(c) is 7 and len(x) is 6.
"""
if len(x) != len(y):
raise ValueError(f"Lengths are not equal. {len(x)=}, {len(x)=}")
summary = (
pd.Series(y).groupby(x).apply(fun).sort_values(ascending=ascending)
)
cats = summary.index.to_list()
return pd.Categorical(x, categories=cats)
# These are function that can be called by the user inside the aes()
# mapping. This is meant to make the variable transformations as easy
# as they are in ggplot2
AES_INNER_NAMESPACE = {"factor": pd.Categorical, "reorder": reorder}
def evaluate(
aesthetics: aes | dict[str, Any], data: pd.DataFrame, env: EvalEnvironment
) -> pd.DataFrame:
"""
Evaluate aesthetics
Parameters
----------
aesthetics : dict-like
Aesthetics to evaluate. They must be of the form {name: expr}
data : pd.DataFrame
Dataframe whose columns are/may-be variables in the aesthetic
expressions i.e. it is a namespace with variables.
env : ~patsy.Eval.Environment
Environment in which the aesthetics are evaluated
Returns
-------
evaled : pd.DataFrame
Dataframe of the form {name: result}, where each column is the
result from evaluating an expression.
Examples
--------
>>> import patsy
>>> var1 = 2
>>> env = patsy.eval.EvalEnvironment.capture()
>>> df = pd.DataFrame({'x': range(1, 6)})
>>> aesthetics = {'y': 'x**var1'}
>>> evaluate(aesthetics, df, env)
y
0 1
1 4
2 9
3 16
4 25
"""
env = env.with_outer_namespace(AES_INNER_NAMESPACE)
# Store evaluation results in a dict column in a dict
evaled = {}
# If a column name is not in the data, it is evaluated/transformed
# in the environment of the call to ggplot
for ae, col in aesthetics.items():
if isinstance(col, str):
if col in data:
evaled[ae] = data[col]
else:
try:
new_val = env.eval(col, inner_namespace=data)
except Exception as e:
raise PlotnineError(_TPL_EVAL_FAIL.format(ae, col, str(e)))
try:
evaled[ae] = new_val
except Exception as e:
raise PlotnineError(
_TPL_BAD_EVAL_TYPE.format(
ae, col, str(type(new_val)), str(e)
)
)
elif pdtypes.is_list_like(col):
n = len(col)
if len(data) and n != len(data) and n != 1:
raise PlotnineError(
"Aesthetics must either be length one, "
"or the same length as the data"
)
evaled[ae] = col
elif is_known_scalar(col):
if not len(evaled):
col = [col]
evaled[ae] = col
else:
msg = "Do not know how to deal with aesthetic '{}'"
raise PlotnineError(msg.format(ae))
# Using `type` preserves the subclass of pd.DataFrame
index = data.index if len(data.index) and evaled else None
evaled = type(data)(data=evaled, index=index)
return evaled
def is_known_scalar(value):
"""
Return True if value is a type we expect in a dataframe
"""
def _is_datetime_or_timedelta(value):
# Using pandas.Series helps catch python, numpy and pandas
# versions of these types
return pd.Series(value).dtype.kind in ("M", "m")
return not np.iterable(value) and (
isinstance(value, numbers.Number) or _is_datetime_or_timedelta(value)
)