CLT

CLT THM

Live Session 2 CLT

Simulator to Demonstrate CLT

Control Parameters

df = read.csv(file.choose(), header = TRUE)
n1 = 10 # sample size per sample for 1st distribution
n2 = 100 # sample size per sample for 2nd distribution (we will compare these distribuions) 
simulations = 1000 #number of samples and thus number of xbars we will generate.  
mu = 0; # mean parameter for use with normal distribuions
sigma = 1; # standard deviation parameter for use with normal distribuions

Data Holder

xbar_holder1 = numeric(simulations) # This will hold all the sample means for the first distribution.
xbar_holder2 = numeric(simulations) # This will hold all the sample means for the second distribution.

Simulate and Store

Generate 1000 samples each of size 10 and find the mean of each sample. Then store each mean in the xbar_holder vector.

for (i in 1:simulations)
{ 
  sample1 = rnorm(n1,mean = mu, sd = sigma)
  sample2 = rnorm(n2,mean = mu, sd = sigma)
  xbar1 = mean(sample1)
  xbar2 = mean(sample2)
  xbar_holder1[i] = xbar1
  xbar_holder2[i] = xbar2
}

Display the Distribution of Sample Means

(plot a histogram of the sample means)

par(mfrow = c(2,1))
hist(xbar_holder1, col = "blue", main = paste("Distribution of the sample mean: n = ", n1), xlab = "Dist 1 Sample Means", xlim = c(-4,4))
hist(xbar_holder2, col = "red", main = paste("Distribution of the sample mean: n = ", n2), xlab = "Dist 2 Sample Means", xlim = c(-4,4))

Summary Statistics

summary(xbar_holder1) #5 number summary and the mean
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -1.088325 -0.209445  0.004788  0.002920  0.207813  1.147680
summary(xbar_holder2) #5 number summary and the mean
##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
## -0.3202452 -0.0664461 -0.0004730  0.0005641  0.0703040  0.3424002
sd(xbar_holder1) # standard deviation of dstribuion 1
## [1] 0.3193035
sd(xbar_holder2) #standard deviation of distribuion 2
## [1] 0.102087