Source code for plotnine.stats.stat_qq_line
import numpy as np
import pandas as pd
from ..doctools import document
from ..exceptions import PlotnineError
from .stat import stat
from .stat_qq import stat_qq
[docs]@document
class stat_qq_line(stat):
"""
Calculate line through quantile-quantile plot
{usage}
Parameters
----------
{common_parameters}
distribution : str (default: norm)
Distribution or distribution function name. The default is
*norm* for a normal probability plot. Objects that look enough
like a stats.distributions instance (i.e. they have a ppf
method) are also accepted. See :mod:`scipy stats <scipy.stats>`
for available distributions.
dparams : dict, optional
Distribution-specific shape parameters (shape parameters plus
location and scale).
quantiles : array_like, optional
Probability points at which to calculate the theoretical
quantile values. If provided, must be the same number as
as the sample data points. The default is to use calculated
theoretical points, use to ``alpha_beta`` control how
these points are generated.
alpha_beta : tuple
Parameter values to use when calculating the quantiles.
Default is :py:`(3/8, 3/8)`.
line_p : tuple, optional
Quantiles to use when fitting a Q-Q line. Must be 2 values.
Default is :py:`(0.25, 0.75)`.
fullrange : bool
If :py:`True` the fit will span the full range of the plot.
See Also
--------
scipy.stats.mstats.plotting_positions : Uses ``alpha_beta``
to calculate the quantiles.
"""
REQUIRED_AES = {"sample"}
DEFAULT_PARAMS = {
"geom": "qq_line",
"position": "identity",
"na_rm": False,
"distribution": "norm",
"dparams": {},
"quantiles": None,
"alpha_beta": (3 / 8, 3 / 8),
"line_p": (0.25, 0.75),
"fullrange": False,
}
CREATES = {"x", "y"}
def setup_params(self, data):
if len(self.params["line_p"]) != 2:
raise PlotnineError(
"Cannot fit line quantiles. " "'line_p' must be of length 2"
)
return self.params
@classmethod
def compute_group(cls, data, scales, **params):
from scipy.stats.mstats import mquantiles
from .distributions import get_continuous_distribution
line_p = params["line_p"]
dparams = params["dparams"]
# Compute theoretical values
qq_gdata = stat_qq.compute_group(data, scales, **params)
sample = qq_gdata["sample"].to_numpy()
theoretical = qq_gdata["theoretical"].to_numpy()
# Compute slope & intercept of the line through the quantiles
cdist = get_continuous_distribution(params["distribution"])
x_coords = cdist.ppf(line_p, **dparams)
y_coords = mquantiles(sample, line_p)
slope = (np.diff(y_coords) / np.diff(x_coords))[0]
intercept = y_coords[0] - slope * x_coords[0]
# Get x,y points that describe the line
if params["fullrange"] and scales.x:
x = scales.x.dimension()
else:
x = theoretical.min(), theoretical.max()
x = np.asarray(x)
y = slope * x + intercept
data = pd.DataFrame({"x": x, "y": y})
return data