Source code for plotnine.stats.stat_qq

import numpy as np
import pandas as pd

from ..doctools import document
from ..exceptions import PlotnineError
from ..mapping.evaluation import after_stat
from .stat import stat


# Note: distribution should be a name from scipy.stat.distribution
[docs]@document class stat_qq(stat): """ Calculation for quantile-quantile plot {usage} Parameters ---------- {common_parameters} distribution : str (default: norm) Distribution or distribution function name. The default is *norm* for a normal probability plot. Objects that look enough like a stats.distributions instance (i.e. they have a ppf method) are also accepted. See :mod:`scipy stats <scipy.stats>` for available distributions. dparams : dict Distribution-specific shape parameters (shape parameters plus location and scale). quantiles : array_like, optional Probability points at which to calculate the theoretical quantile values. If provided, must be the same number as as the sample data points. The default is to use calculated theoretical points, use to ``alpha_beta`` control how these points are generated. alpha_beta : tuple Parameter values to use when calculating the quantiles. Default is :py:`(3/8, 3/8)`. See Also -------- scipy.stats.mstats.plotting_positions : Uses ``alpha_beta`` to calculate the quantiles. """ _aesthetics_doc = """ {aesthetics_table} .. rubric:: Options for computed aesthetics :: 'theoretical' # theoretical quantiles 'sample' # sample quantiles """ REQUIRED_AES = {"sample"} DEFAULT_AES = {"x": after_stat("theoretical"), "y": after_stat("sample")} DEFAULT_PARAMS = { "geom": "qq", "position": "identity", "na_rm": False, "distribution": "norm", "dparams": {}, "quantiles": None, "alpha_beta": (3 / 8, 3 / 8), } @classmethod def compute_group(cls, data, scales, **params): from scipy.stats.mstats import plotting_positions from .distributions import get_continuous_distribution sample = data["sample"].sort_values().to_numpy() alpha, beta = params["alpha_beta"] quantiles = params["quantiles"] if quantiles is None: quantiles = plotting_positions(sample, alpha, beta) elif len(quantiles) != len(sample): raise PlotnineError( "The number of quantile values is not the same as " "the number of sample values." ) quantiles = np.asarray(quantiles) cdist = get_continuous_distribution(params["distribution"]) theoretical = cdist.ppf(quantiles, **params["dparams"]) return pd.DataFrame({"sample": sample, "theoretical": theoretical})