Source code for plotnine.stats.stat_ydensity

from contextlib import suppress

import numpy as np
import pandas as pd

from ..doctools import document
from ..exceptions import PlotnineError
from .stat import stat
from .stat_density import compute_density, stat_density


[docs]@document class stat_ydensity(stat): """ Density estimate {usage} Parameters ---------- {common_parameters} kernel : str, optional (default: 'gaussian') Kernel used for density estimation. One of:: 'biweight' 'cosine' 'cosine2' 'epanechnikov' 'gaussian' 'triangular' 'triweight' 'uniform' adjust : float, optional (default: 1) An adjustment factor for the ``bw``. Bandwidth becomes :py:`bw * adjust`. Adjustment of the bandwidth. trim : bool, optional (default: False) This parameter only matters if you are displaying multiple densities in one plot. If :py:`False`, the default, each density is computed on the full range of the data. If :py:`True`, each density is computed over the range of that group; this typically means the estimated x values will not line-up, and hence you won't be able to stack density values. n : int, optional(default: 1024) Number of equally spaced points at which the density is to be estimated. For efficient computation, it should be a power of two. bw : str or float, optional (default: 'nrd0') The bandwidth to use, If a float is given, it is the bandwidth. The :py:`str` choices are:: 'normal_reference' 'scott' 'silverman' ``nrd0`` is a port of ``stats::bw.nrd0`` in R; it is eqiuvalent to ``silverman`` when there is more than 1 value in a group. scale : (default: area) How to scale the violins. The options are:: 'area' # all violins have the same area, before # trimming the tails. 'count' # areas are scaled proportionally to the number # of observations. 'width' # all violins have the same maximum width. See Also -------- plotnine.geoms.geom_violin statsmodels.nonparametric.kde.KDEUnivariate statsmodels.nonparametric.kde.KDEUnivariate.fit """ _aesthetics_doc = """ {aesthetics_table} .. rubric:: Options for computed aesthetics :: 'width' # Maximum width of density, [0, 1] range. 'violinwidth' # Shape of the violin Calculated aesthetics are accessed using the `after_stat` function. e.g. :py:`after_stat('width')`. """ REQUIRED_AES = {"x", "y"} NON_MISSING_AES = {"weight"} DEFAULT_PARAMS = { "geom": "violin", "position": "dodge", "na_rm": False, "adjust": 1, "kernel": "gaussian", "n": 1024, "trim": True, "bw": "nrd0", "scale": "area", } DEFAULT_AES = {"weight": None} CREATES = {"width", "violinwidth"} def setup_params(self, data): params = self.params.copy() valid_scale = ("area", "count", "width") if params["scale"] not in valid_scale: msg = "Parameter scale should be one of {}" raise PlotnineError(msg.format(valid_scale)) lookup = { "biweight": "biw", "cosine": "cos", "cosine2": "cos2", "epanechnikov": "epa", "gaussian": "gau", "triangular": "tri", "triweight": "triw", "uniform": "uni", } with suppress(KeyError): params["kernel"] = lookup[params["kernel"].lower()] if params["kernel"] not in lookup.values(): msg = ( "kernel should be one of {}. " "You may use the abbreviations {}" ) raise PlotnineError(msg.format(lookup.keys(), lookup.values())) missing_params = stat_density.DEFAULT_PARAMS.keys() - params.keys() for key in missing_params: params[key] = stat_density.DEFAULT_PARAMS[key] return params @classmethod def compute_panel(cls, data, scales, **params): data = super(cls, cls).compute_panel(data, scales, **params) if not len(data): return data if params["scale"] == "area": data["violinwidth"] = data["density"] / data["density"].max() elif params["scale"] == "count": data["violinwidth"] = ( data["density"] / data["density"].max() * data["n"] / data["n"].max() ) elif params["scale"] == "width": data["violinwidth"] = data["scaled"] else: msg = "Unknown scale value '{}'" raise PlotnineError(msg.format(params["scale"])) return data @classmethod def compute_group(cls, data, scales, **params): n = len(data) if n == 0: return pd.DataFrame() weight = data.get("weight") if params["trim"]: range_y = data["y"].min(), data["y"].max() else: range_y = scales.y.dimension() dens = compute_density(data["y"], weight, range_y, **params) if not len(dens): return dens dens["y"] = dens["x"] dens["x"] = np.mean([data["x"].min(), data["x"].max()]) # Compute width if x has multiple values if len(np.unique(data["x"])) > 1: dens["width"] = np.ptp(data["x"]) * 0.9 # type: ignore return dens