“Student” t-distribution

“Student” t-distribution#

Visit Wikipedia for a quick review. The name is because of the developer, William Sealy Gosset, who worked for Guinness and published under the pseudonym “Student”.

# Author: Jake VanderPlas
# License: BSD
#   The figure produced by this code is published in the textbook
#   "Statistics, Data Mining, and Machine Learning in Astronomy" (2013)
#   For more information, see http://astroML.github.com
#   To report a bug or issue, use the following forum:
#    https://groups.google.com/forum/#!forum/astroml-general
import numpy as np
from scipy.stats import t as student_t
from matplotlib import pyplot as plt
%matplotlib inline

plt.rcParams['figure.figsize'] = 12, 10 
plt.rcParams['font.size'] = 16 

#----------------------------------------------------------------------
# This function adjusts matplotlib settings for a uniform feel in the textbook.
# Note that with usetex=True, fonts are rendered with LaTeX.  This may
# result in an error if LaTeX is not installed on your system.  In that case,
# you can set usetex to False.
# from astroML.plotting import setup_text_plots
# setup_text_plots(fontsize=8, usetex=True)

#------------------------------------------------------------
# Define the distribution parameters to be plotted
mu = 0
k_values = [1E10, 2, 1, 0.5]
linestyles = ['-', '--', ':', '-.']
x = np.linspace(-10, 10, 1000)

#------------------------------------------------------------
# plot the distributions
fig, ax = plt.subplots(figsize=(5, 3.75))

for k, ls in zip(k_values, linestyles):
    dist = student_t(k, 0)

    if k >= 1E10:
        label = r'$\mathrm{t}(k=\infty)$'
    else:
        label = r'$\mathrm{t}(k=%.1f)$' % k

    plt.plot(x, dist.pdf(x), ls=ls, c='black', label=label)

plt.xlim(-5, 5)
plt.ylim(0.0, 0.45)

plt.xlabel('$x$')
plt.ylabel(r'$p(x|k)$')
plt.title("Student's $t$ Distribution")

plt.legend(bbox_to_anchor=(1.25, 1.0))
plt.show()
../_images/30bb63283076219d2414e5541d0ff0bcd70dbc82429f86470a8ecf905d3b3316.png
## Lets do an example, thanks to Prof. Cimbala
from IPython.display import YouTubeVideo
# a talk about IPython at Sage Days at U. Washington, Seattle.
# Video credit: William Stein.
YouTubeVideo('pVk3w9aaSSo')
# measured quantities:
x_mean = 8.240 # kOhm
Sx = 0.314 # kOhm
N = 20 # samples

# for the 95 confidence level
confidence_level = 0.95 # 95%
alpha = 1 - confidence_level
degrees_of_freedom = N - 1
# how to get values from the distribution? try to google it down:
# http://stackoverflow.com/questions/27315161/displaying-a-youtube-clip-in-python
#Student, n=999, p<0.05, 2-tail
#equivalent to Excel TINV(0.05,999)
print('Two tail problem:'); 
print(student_t.ppf(1-alpha/2.0, degrees_of_freedom))
t_value = student_t.ppf(1-alpha/2.0, degrees_of_freedom)

#Student, n=999, p<0.05%, Single tail
#equivalent to Excel TINV(2*0.05,999)
print('Single tail:')
print(student_t.ppf(1-alpha, degrees_of_freedom))
Two tail problem:
2.093024054408263
Single tail:
1.729132811521367
# the answer: 
# the population mean is 

print('Population mean is :')
print('{:.3f} +- {:.3f}'.format(x_mean, t_value*Sx/(np.sqrt(N))))
Population mean is :
8.240 +- 0.147
confidence_level = 0.99 # 95%
alpha = 1 - confidence_level
t_value = student_t.ppf(1-alpha/2.0, degrees_of_freedom)
print('t-value is higher {:.3f}'.format(t_value))
print('Population mean is :')
print('{:.3f} +- {:.3f}'.format(x_mean, t_value*Sx/(np.sqrt(N))))
t-value is higher 2.861
Population mean is :
8.240 +- 0.201