values = [7, 2, 3, 4, 5, 6, 7, 7, 9, 4]
Mean
mean = sum(values) / len(values)
6.4
Median
values = sorted(values)
n = len(values)
if n % 2 == 0:
lo_med = values[n//2 - 1]
hi_med = values[n//2]
median = (lo_med + hi_med) / 2
else:
median = values[n//2]
6.5
Mode
freqs = {}
for value in values:
if value in freqs:
freqs[value] += 1
else:
freqs[value] = 1
mode = -1
hi_freq = 0
for value, freq in freqs.items():
if freq > hi_freq:
mode = value
mode_freq = hi_freq
7
import pandas as pd
values = pd.Series([7, 2, 3, 4, 5, 6, 7, 7, 9, 4])
Mean
mean = values.mean()
6.4
Median
median = values.median()
6.5
Mode
mode = values.mode()
7
values = [7, 2, 3, 4, 5, 6, 7, 7, 9, 4]
Range
rng = max(values) - min(values)
7
Standard Deviation
mean = sum(values) / len(values)
sum_2 = 0
for value in values:
sum_2 += (value - mean) ** 2
variance = sum_2 / (len(values) - 1)
std_dev = variance ** (1/2)
5.822222222222222
Interquartile Range
values = sorted(values)
n = len(values)
q1 = values[n//4]
q3 = values[3*n//4]
iqr = q3 - q1
2.414866761849468
values = [7, 2, 3, 4, 5, 6, 7, 7, 9, 4]
Range
rng = values.max() - values.min()
7
Standard Deviation
std_dev = values.std()
2.414866761849468
Interquartile Range
q1 = values.quantile(0.25)
q3 = values.quantile(0.75)
iqr = q3 - q1
2.5
values = pd.Series([7, 2, 3, 4, 5, 6, 7, 7, 9, 4])
p05 = values.quantile(0.05)
p25 = values.quantile(0.25)
p50 = values.quantile(0.50) # Same as median
p75 = values.quantile(0.75)
p95 = values.quantile(0.95)
import pandas as pd
data = {
'x': [1, 2, 3, 4, 5],
'y': [2, 4, 6, 8, 10]
}
df = pd.DataFrame(data)
corr = df['x'].corr(df['y'])
cov = df['x'].cov(df['y'])
Is it significantly outside the inner fences?
We can also look at the z-score to determine if it's an outlier.
Outliers are only outliers until they're not.
Francis Galton introduced the concept of "regression" to describe the tendency of offspring to revert towards the average characteristics of their parents, a phenomenon he observed in the heights of parents and their children.