Source code for plotnine.geoms.geom_boxplot

from __future__ import annotations

import typing
from warnings import warn

import numpy as np
import pandas as pd

from ..doctools import document
from ..exceptions import PlotnineWarning
from ..positions import position_dodge2
from ..positions.position import position
from ..utils import (
    SIZE_FACTOR,
    copy_missing_columns,
    resolution,
    to_rgba,
)
from .geom import geom
from .geom_crossbar import geom_crossbar
from .geom_point import geom_point
from .geom_segment import geom_segment

if typing.TYPE_CHECKING:
    from typing import Any

    import numpy.typing as npt

    from plotnine.iapi import panel_view
    from plotnine.typing import (
        Aes,
        Axes,
        Coord,
        DataLike,
        DrawingArea,
        Layer,
    )


[docs]@document class geom_boxplot(geom): """ Box and whiskers plot {usage} Parameters ---------- {common_parameters} width : float, optional (default None) Box width. If :py:`None`, the width is set to `90%` of the resolution of the data. Note that if the stat has a width parameter, that takes precedence over this one. outlier_alpha : float, optional (default: 1) Transparency of the outlier points. outlier_color : str or tuple, optional (default: None) Color of the outlier points. outlier_shape : str, optional (default: o) Shape of the outlier points. An empty string hides the outliers. outlier_size : float, optional (default: 1.5) Size of the outlier points. outlier_stroke : float, optional (default: 0.5) Stroke-size of the outlier points. notch : bool, optional (default: False) Whether the boxes should have a notch. varwidth : bool, optional (default: False) If :py:`True`, boxes are drawn with widths proportional to the square-roots of the number of observations in the groups. notchwidth : float, optional (default: 0.5) Width of notch relative to the body width. fatten : float, optional (default: 2) A multiplicative factor used to increase the size of the middle bar across the box. """ DEFAULT_AES = { "alpha": 1, "color": "#333333", "fill": "white", "linetype": "solid", "shape": "o", "size": 0.5, "weight": 1, } REQUIRED_AES = {"x", "lower", "upper", "middle", "ymin", "ymax"} DEFAULT_PARAMS = { "stat": "boxplot", "position": "dodge2", "na_rm": False, "width": None, "outlier_alpha": 1, "outlier_color": None, "outlier_shape": "o", "outlier_size": 1.5, "outlier_stroke": 0.5, "notch": False, "varwidth": False, "notchwidth": 0.5, "fatten": 2, } def __init__( self, mapping: Aes | None = None, data: DataLike | None = None, **kwargs: Any, ): _position = kwargs.get("position", self.DEFAULT_PARAMS["position"]) varwidth = kwargs.get("varwidth", self.DEFAULT_PARAMS["varwidth"]) # varwidth = True is not compatible with preserve='total' if varwidth: if isinstance(_position, str): kwargs["position"] = position_dodge2(preserve="single") elif isinstance(_position, position): if _position.params["preserve"] == "total": warn( "Cannot preserve total widths when varwidth=True", PlotnineWarning, ) _position.params["preserve"] = "single" super().__init__(mapping, data, **kwargs) def setup_data(self, data: pd.DataFrame) -> pd.DataFrame: if "width" not in data: width = self.params.get("width", None) if width is not None: data["width"] = width else: data["width"] = resolution(data["x"], False) * 0.9 if "outliers" not in data: data["outliers"] = [[] for i in range(len(data))] # min and max outlier values omin = [ np.min(lst) if len(lst) else +np.inf for lst in data["outliers"] ] omax = [ np.max(lst) if len(lst) else -np.inf for lst in data["outliers"] ] data["ymin_final"] = np.min( np.column_stack([data["ymin"], omin]), axis=1 ) data["ymax_final"] = np.max( np.column_stack([data["ymax"], omax]), axis=1 ) # if varwidth not requested or not available, don't use it if ( "varwidth" not in self.params or not self.params["varwidth"] or "relvarwidth" not in data ): data["xmin"] = data["x"] - data["width"] / 2 data["xmax"] = data["x"] + data["width"] / 2 else: # make relvarwidth relative to the size of the # largest group data["relvarwidth"] /= data["relvarwidth"].max() data["xmin"] = data["x"] - data["relvarwidth"] * data["width"] / 2 data["xmax"] = data["x"] + data["relvarwidth"] * data["width"] / 2 del data["relvarwidth"] del data["width"] return data @staticmethod def draw_group( data: pd.DataFrame, panel_params: panel_view, coord: Coord, ax: Axes, **params: Any, ): def flat(*args: pd.Series[Any]) -> npt.NDArray[Any]: """Flatten list-likes""" return np.hstack(args) common_columns = [ "color", "size", "linetype", "fill", "group", "alpha", "shape", ] # whiskers whiskers = pd.DataFrame( { "x": flat(data["x"], data["x"]), "y": flat(data["upper"], data["lower"]), "yend": flat(data["ymax"], data["ymin"]), "alpha": 1, } ) whiskers["xend"] = whiskers["x"] copy_missing_columns(whiskers, data[common_columns]) # box box_columns = ["xmin", "xmax", "lower", "middle", "upper"] box = data[common_columns + box_columns].copy() box.rename( columns={"lower": "ymin", "middle": "y", "upper": "ymax"}, inplace=True, ) # notch if params["notch"]: box["ynotchlower"] = data["notchlower"] box["ynotchupper"] = data["notchupper"] # outliers num_outliers = len(data["outliers"].iloc[0]) if num_outliers: def outlier_value(param: str) -> Any: oparam = f"outlier_{param}" if params[oparam] is not None: return params[oparam] return data[param].iloc[0] outliers = pd.DataFrame( { "y": data["outliers"].iloc[0], "x": np.repeat(data["x"].iloc[0], num_outliers), "fill": [None] * num_outliers, } ) outliers["alpha"] = outlier_value("alpha") outliers["color"] = outlier_value("color") outliers["shape"] = outlier_value("shape") outliers["size"] = outlier_value("size") outliers["stroke"] = outlier_value("stroke") geom_point.draw_group(outliers, panel_params, coord, ax, **params) # plot geom_segment.draw_group(whiskers, panel_params, coord, ax, **params) geom_crossbar.draw_group(box, panel_params, coord, ax, **params) @staticmethod def draw_legend( data: pd.Series[Any], da: DrawingArea, lyr: Layer ) -> DrawingArea: """ Draw a rectangle in the box Parameters ---------- data : Series Data Row da : DrawingArea Canvas lyr : layer Layer Returns ------- out : DrawingArea """ from matplotlib.lines import Line2D from matplotlib.patches import Rectangle # box facecolor = to_rgba(data["fill"], data["alpha"]) if facecolor is None: facecolor = "none" kwargs = {"linestyle": data["linetype"], "linewidth": data["size"]} box = Rectangle( (da.width * 0.125, da.height * 0.25), width=da.width * 0.75, height=da.height * 0.5, facecolor=facecolor, edgecolor=data["color"], capstyle="projecting", antialiased=False, **kwargs, ) da.add_artist(box) kwargs["solid_capstyle"] = "butt" kwargs["color"] = data["color"] kwargs["linewidth"] *= SIZE_FACTOR # middle strike through strike = Line2D( [da.width * 0.125, da.width * 0.875], [da.height * 0.5, da.height * 0.5], **kwargs, ) da.add_artist(strike) # whiskers top = Line2D( [da.width * 0.5, da.width * 0.5], [da.height * 0.75, da.height * 0.9], **kwargs, ) da.add_artist(top) bottom = Line2D( [da.width * 0.5, da.width * 0.5], [da.height * 0.25, da.height * 0.1], **kwargs, ) da.add_artist(bottom) return da