Mean, median, and mode
Machine Learning
A brief demonstration of the different biases between mean, median, and mode as outliers are added to the data. (Really just a test of using a Shiny app in a Quarto website via shinylive).
The distribution starts as a Gamma with large \(k\) (to approximate a Gaussian, which has mean=median=mode). As the shape
(\(k\)) parameter is decreased, the Gamma distribution is skewed and more weight is added to the tail, consequently pulling the mean out, the mode in, and setting the median between.
#| standalone: true
#| viewerHeight: 500
from shiny import *
from scipy.stats import gamma
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme()
prop_cycle = plt.rcParams['axes.prop_cycle']
colors = prop_cycle.by_key()['color']
app_ui = ui.page_fluid(
ui.layout_sidebar(
ui.panel_sidebar(
ui.input_slider("shape", "Shape", 0, 50, 50, step=0.1),
),
ui.panel_main(
ui.output_plot("plot"),
),
),
)
def server(input, output, session):
@output
@render.plot(alt = "Sine")
def plot():
rv = gamma(input.shape())
xmax = rv.ppf(.999)
x = np.linspace(0, xmax, 200)
pdf = rv.pdf(x)
mean = rv.mean()
median = rv.median()
mode = x[np.argmax(pdf)]
ax = sns.lineplot(x=x, y=pdf)
ax.axvline(x=mean, ymin=0, ymax=1, label="Mean", c=colors[1])
ax.axvline(x=median, ymin=0, ymax=1, label="Median", c=colors[2])
ax.axvline(x=mode, ymin=0, ymax=1, label="Mode", c=colors[3])
plt.legend()
app = App(app_ui, server)