Source code for plotnine.geoms.geom

from __future__ import annotations

import typing
from copy import deepcopy
from itertools import chain, repeat

from ..exceptions import PlotnineError
from ..layer import layer
from ..mapping.aes import is_valid_aesthetic, rename_aesthetics
from ..mapping.evaluation import evaluate
from ..positions.position import position
from ..stats.stat import stat
from ..utils import (
    Registry,
    copy_keys,
    data_mapping_as_kwargs,
    is_list_like,
    is_string,
    remove_missing,
)

if typing.TYPE_CHECKING:
    from typing import Any

    import pandas as pd

    from plotnine.iapi import panel_view
    from plotnine.typing import (
        Aes,
        Axes,
        Coord,
        DataLike,
        DrawingArea,
        EvalEnvironment,
        Ggplot,
        Layer,
        Layout,
    )


[docs]class geom(metaclass=Registry): """Base class of all Geoms""" __base__ = True #: Default aesthetics for the geom DEFAULT_AES: dict[str, Any] = {} #: Required aesthetics for the geom REQUIRED_AES: set[str] = set() #: Required aesthetics for the geom NON_MISSING_AES: set[str] = set() #: Required parameters for the geom DEFAULT_PARAMS: dict[str, Any] = {} #: geom/layer specific dataframe data: DataLike #: mappings i.e. :py:`aes(x='col1', fill='col2')` mapping: Aes aes_params: dict[str, Any] = {} # setting of aesthetic params: dict[str, Any] # parameter settings # Plot namespace, it gets its value when the plot is being # built. environment: EvalEnvironment # The geom responsible for the legend if draw_legend is # not implemented legend_geom: str = "point" # Documentation for the aesthetics. It is added under the # documentation for mapping parameter. Use {aesthetics} # placeholder to insert a table for all the aesthetics and # their default values. _aesthetics_doc: str = "{aesthetics_table}" def __init__( self, mapping: Aes | None = None, data: DataLike | None = None, **kwargs: Any, ): kwargs = rename_aesthetics(kwargs) kwargs = data_mapping_as_kwargs((data, mapping), kwargs) self._kwargs = kwargs # Will be used to create stat & layer # separate aesthetics and parameters self.aes_params = copy_keys(kwargs, {}, self.aesthetics()) self.params = copy_keys(kwargs, deepcopy(self.DEFAULT_PARAMS)) self.mapping = kwargs["mapping"] self.data = kwargs["data"] self._stat = stat.from_geom(self) self._position = position.from_geom(self) self._verify_arguments(kwargs) # geom, stat, layer
[docs] @staticmethod def from_stat(stat: stat) -> geom: """ Return an instantiated geom object geoms should not override this method. Parameters ---------- stat : stat `stat` Returns ------- out : geom A geom object Raises ------ PlotnineError If unable to create a `geom`. """ name = stat.params["geom"] if isinstance(name, geom): return name if isinstance(name, type) and issubclass(name, geom): klass = name elif is_string(name): if not name.startswith("geom_"): name = f"geom_{name}" klass = Registry[name] else: raise PlotnineError(f"Unknown geom of type {type(name)}") return klass(stat=stat, **stat._kwargs)
[docs] @classmethod def aesthetics(cls: type[geom]) -> set[str]: """ Return all the aesthetics for this geom geoms should not override this method. """ main = cls.DEFAULT_AES.keys() | cls.REQUIRED_AES other = {"group"} # Need to recognize both spellings if "color" in main: other.add("colour") if "outlier_color" in main: other.add("outlier_colour") return main | other
def __deepcopy__(self, memo: dict[Any, Any]) -> geom: """ Deep copy without copying the self.data dataframe geoms should not override this method. """ cls = self.__class__ result = cls.__new__(cls) memo[id(self)] = result old = self.__dict__ new = result.__dict__ # don't make a deepcopy of data, or environment shallow = {"data", "_kwargs", "environment"} for key, item in old.items(): if key in shallow: new[key] = old[key] memo[id(new[key])] = new[key] else: new[key] = deepcopy(old[key], memo) return result
[docs] def setup_data(self, data: pd.DataFrame) -> pd.DataFrame: """ Modify the data before drawing takes place This function is called *before* position adjustments are done. It is used by geoms to create the final aesthetics used for drawing. The base class method does nothing, geoms can override this method for two reasons: 1. The ``stat`` does not create all the aesthetics (usually position aesthetics) required for drawing the ``geom``, but those aesthetics can be computed from the available data. For example :class:`~plotnine.geoms.geom_boxplot` and :class:`~plotnine.geoms.geom_violin`. 2. The ``geom`` inherits from another ``geom`` (superclass) which does the drawing and the superclass requires certain aesthetics to be present in the data. For example :class:`~plotnine.geoms.geom_tile` and :class:`~plotnine.geoms.geom_area`. Parameters ---------- data : dataframe Data used for drawing the geom. Returns ------- out : dataframe Data used for drawing the geom. """ return data
[docs] def use_defaults( self, data: pd.DataFrame, aes_modifiers: dict[str, Any] ) -> pd.DataFrame: """ Combine data with defaults and set aesthetics from parameters geoms should not override this method. Parameters ---------- data : dataframe Data used for drawing the geom. aes_modifiers : dict Aesthetics Returns ------- out : dataframe Data used for drawing the geom. """ missing_aes = ( self.DEFAULT_AES.keys() - self.aes_params.keys() - set(data.columns.to_list()) ) # Not in data and not set, use default for ae in missing_aes: data[ae] = self.DEFAULT_AES[ae] # Evaluate/Modify the mapped aesthetics evaled = evaluate(aes_modifiers, data, self.environment) for ae in evaled.columns.intersection(data.columns): data[ae] = evaled[ae] if "PANEL" in data: num_panels = len(data["PANEL"].unique()) else: num_panels = 1 # Aesthetics set as parameters to the geom/stat for ae, value in self.aes_params.items(): try: data[ae] = value except ValueError: # sniff out the special cases, like custom # tupled linetypes, shapes and colors if is_valid_aesthetic(value, ae): data[ae] = [value] * len(data) elif num_panels > 1 and is_list_like(value): data[ae] = list(chain(*repeat(value, num_panels))) else: msg = "'{}' does not look like a " "valid value for `{}`" raise PlotnineError(msg.format(value, ae)) return data
[docs] def draw_layer( self, data: pd.DataFrame, layout: Layout, coord: Coord, **params: Any ): """ Draw layer across all panels geoms should not override this method. Parameters ---------- data : DataFrame DataFrame specific for this layer layout : Layout Layout object created when the plot is getting built coord : coord Type of coordinate axes params : dict Combined *geom* and *stat* parameters. Also includes the stacking order of the layer in the plot (*zorder*) """ for pid, pdata in data.groupby("PANEL"): if len(pdata) == 0: continue ploc = pdata["PANEL"].iloc[0] - 1 panel_params = layout.panel_params[ploc] ax = layout.axs[ploc] self.draw_panel(pdata, panel_params, coord, ax, **params)
[docs] def draw_panel( self, data: pd.DataFrame, panel_params: panel_view, coord: Coord, ax: Axes, **params: Any, ): """ Plot all groups For efficiency, geoms that do not need to partition different groups before plotting should override this method and avoid the groupby. Parameters ---------- data : dataframe Data to be plotted by this geom. This is the dataframe created in the plot_build pipeline. panel_params : panel_view The scale information as may be required by the axes. At this point, that information is about ranges, ticks and labels. Attributes are of interest to the geom are:: 'panel_params.x.range' # tuple 'panel_params.y.range' # tuple coord : coord Coordinate (e.g. coord_cartesian) system of the geom. ax : axes Axes on which to plot. params : dict Combined parameters for the geom and stat. Also includes the 'zorder'. """ for _, gdata in data.groupby("group"): gdata.reset_index(inplace=True, drop=True) self.draw_group(gdata, panel_params, coord, ax, **params)
[docs] @staticmethod def draw_group( data: pd.DataFrame, panel_params: panel_view, coord: Coord, ax: Axes, **params: Any, ): """ Plot data belonging to a group. Parameters ---------- data : dataframe Data to be plotted by this geom. This is the dataframe created in the plot_build pipeline. panel_params : panel_view The scale information as may be required by the axes. At this point, that information is about ranges, ticks and labels. Keys of interest to the geom are:: 'x_range' # tuple 'y_range' # tuple coord : coord Coordinate (e.g. coord_cartesian) system of the geom. ax : axes Axes on which to plot. params : dict Combined parameters for the geom and stat. Also includes the 'zorder'. """ msg = "The geom should implement this method." raise NotImplementedError(msg)
[docs] @staticmethod def draw_unit( data: pd.DataFrame, panel_params: panel_view, coord: Coord, ax: Axes, **params: Any, ): """ Plot data belonging to a unit. A matplotlib plot function may require that an aethestic have a single unique value. e.g. linestyle='dashed' and not linestyle=['dashed', 'dotted', ...]. A single call to such a function can only plot lines with the same linestyle. However, if the plot we want has more than one line with different linestyles, we need to group the lines with the same linestyle and plot them as one unit. In this case, draw_group calls this function to do the plotting. For an example see :class:`~plotnine.geoms.geom_point`. Parameters ---------- data : dataframe Data to be plotted by this geom. This is the dataframe created in the plot_build pipeline. panel_params : panel_view The scale information as may be required by the axes. At this point, that information is about ranges, ticks and labels. Keys of interest to the geom are:: 'x_range' # tuple 'y_range' # tuple In rare cases a geom may need access to the x or y scales. Those are available at:: 'scales' # SimpleNamespace coord : coord Coordinate (e.g. coord_cartesian) system of the geom. ax : axes Axes on which to plot. params : dict Combined parameters for the geom and stat. Also includes the 'zorder'. """ msg = "The geom should implement this method." raise NotImplementedError(msg)
def __radd__(self, gg: Ggplot) -> Ggplot: """ Add layer representing geom object on the right Parameters ---------- gg : ggplot ggplot object Returns ------- out : ggplot ggplot object with added layer. """ gg += self.to_layer() # Add layer return gg
[docs] def to_layer(self) -> Layer: """ Make a layer that represents this geom Returns ------- out : layer Layer """ return layer.from_geom(self)
def _verify_arguments(self, kwargs: dict[str, Any]): """ Verify arguments passed to the geom """ geom_stat_args = kwargs.keys() | self._stat._kwargs.keys() unknown = ( geom_stat_args - self.aesthetics() - self.DEFAULT_PARAMS.keys() # geom aesthetics - self._stat.aesthetics() # geom parameters - self._stat.DEFAULT_PARAMS.keys() # stat aesthetics - { # stat parameters "data", "mapping", "show_legend", # layer parameters "inherit_aes", "raster", } ) # layer parameters if unknown: msg = ( "Parameters {}, are not understood by " "either the geom, stat or layer." ) raise PlotnineError(msg.format(unknown))
[docs] def handle_na(self, data: pd.DataFrame) -> pd.DataFrame: """ Remove rows with NaN values geoms that infer extra information from missing values should override this method. For example :class:`~plotnine.geoms.geom_path`. Parameters ---------- data : dataframe Data Returns ------- out : dataframe Data without the NaNs. Notes ----- Shows a warning if the any rows are removed and the `na_rm` parameter is False. It only takes into account the columns of the required aesthetics. """ return remove_missing( data, self.params["na_rm"], list(self.REQUIRED_AES | self.NON_MISSING_AES), self.__class__.__name__, )
[docs] @staticmethod def draw_legend( data: pd.Series[Any], da: DrawingArea, lyr: Layer ) -> DrawingArea: """ Draw a rectangle in the box Parameters ---------- data : Series Data Row da : DrawingArea Canvas lyr : layer Layer Returns ------- out : DrawingArea """ msg = "The geom should implement this method." raise NotImplementedError(msg)