Mean, median, and mode

Author

Connor Robertson

A brief demonstration of the different biases between mean, median, and mode as outliers are added to the data.

The distribution starts as a Gamma with large \(k\) (to approximate a Gaussian, which has mean=median=mode). As the shape (\(k\)) parameter is decreased, the Gamma distribution is skewed and more weight is added to the tail, consequently pulling the mean out, the mode in, and setting the median between.

#| standalone: true
#| viewerHeight: 500

from shiny import *
from scipy.stats import gamma
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme()

prop_cycle = plt.rcParams['axes.prop_cycle']
colors = prop_cycle.by_key()['color']

app_ui = ui.page_fluid(
    ui.layout_sidebar(
        ui.panel_sidebar(
            ui.input_slider("shape", "Shape", 0, 50, 50, step=0.1),
        ),
        ui.panel_main(
            ui.output_plot("plot"),
        ),
    ),
)

def server(input, output, session):
    @output
    @render.plot(alt = "Sine")
    def plot():
        rv = gamma(input.shape())
        xmax = rv.ppf(.999)
        x = np.linspace(0, xmax, 200)

        pdf = rv.pdf(x)
        mean = rv.mean()
        median = rv.median()
        mode = x[np.argmax(pdf)]

        ax = sns.lineplot(x=x, y=pdf)
        ax.axvline(x=mean, ymin=0, ymax=1, label="Mean", c=colors[1])
        ax.axvline(x=median, ymin=0, ymax=1, label="Median", c=colors[2])
        ax.axvline(x=mode, ymin=0, ymax=1, label="Mode", c=colors[3])
        plt.legend()


app = App(app_ui, server)