Update: @Paje points out that the theory behind the computations assumes that the population is normally distributed; when normality is violated the theory doesn't hold exactly. It's an important point but keep in mind that "not exact" doesn't necessarily mean "wrong"; it means "approximate". The relevant question is: Is the normality assumption reasonably satisfied to justify my analysis? So for fun, I've updated the simulation to sample from a) the normal; b) the Laplace distribution which is symmetric, with heavier tails than the normal; c) the log-normal which is skewed to the right.
Terminology: The half-width of a confidence interval is known as its margin of error. I'll use "margin" and "half-width" interchangeably.
It seems that by "end up with the correct width" you mean that the half-width of the confidence interval is exactly $\operatorname{margin}$ if we calculate the sample size $n$ to achieve margin of error $\operatorname{margin}$. Not quite.
If we have an accurate estimate $\hat{\sigma}$ of the true standard deviation $\sigma$, then we don't need to estimate $\sigma$ from the experimental data. We can plug in $\hat{\sigma}$ in the formula for the confidence interval and the margin of error $z_{\alpha/2}\hat{\sigma}/\sqrt{n}$ is fixed.
If we decide to estimate $\sigma$ with the sample standard deviation $s$, then the margin of error is $t_{\alpha/2,n-1}s/\sqrt{n}$ as you point out. And since $s$ is a random variable, it can be either smaller or bigger than $\sigma$. In other words, if we repeat the experiment with the same sample size $n$, the margin of error $t_{\alpha/2,n-1}s/\sqrt{n}$ will vary from replication to replication because $s$ varies. It's never going to be exactly equal to $\operatorname{margin}$.
What matters is that the coverage of the confidence interval is $100(1-\alpha)$% under the null hypothesis, ie, if the null hypothesis is true and we repeat the experiment many times, 95% of the confidence intervals thus constructed will contain the true mean. As long as the "known" $\hat{\sigma}$ is an accurate estimate of the true standard deviation $\sigma$ and the distribution is not asymmetric, the sample size calculation $n \approx (z_{\alpha/2}\hat{\sigma}/\operatorname{margin})^2$ results in (approximately) correct coverage for the $z$ and $t$ confidence intervals. The approximation gets better with larger sample size ⇔ smaller margin of error.
| distribution |
mean |
std.dev |
margin |
n |
z_coverage |
z_lower |
z_upper |
t_coverage |
t_lower |
t_upper |
| normal |
0 |
1 |
0.50 |
17 |
0.9498 |
0.0264 |
0.0238 |
0.9456 |
0.0268 |
0.0276 |
| laplace |
0 |
1.41 |
0.50 |
33 |
0.9564 |
0.0202 |
0.0234 |
0.9544 |
0.0218 |
0.0238 |
| lognormal |
1.65 |
2.16 |
0.50 |
76 |
0.9560 |
0.0060 |
0.0380 |
0.9146 |
0.0818 |
0.0036 |
| normal |
0 |
1 |
0.10 |
404 |
0.9598 |
0.0212 |
0.0190 |
0.9568 |
0.0224 |
0.0208 |
| laplace |
0 |
1.41 |
0.10 |
808 |
0.9588 |
0.0212 |
0.0200 |
0.9538 |
0.0246 |
0.0216 |
| lognormal |
1.65 |
2.16 |
0.10 |
1886 |
0.9562 |
0.0160 |
0.0278 |
0.9476 |
0.0358 |
0.0166 |
| normal |
0 |
1 |
0.05 |
1615 |
0.9530 |
0.0254 |
0.0216 |
0.9488 |
0.0274 |
0.0238 |
| laplace |
0 |
1.41 |
0.05 |
3229 |
0.9562 |
0.0216 |
0.0222 |
0.9496 |
0.0252 |
0.0252 |
| lognormal |
1.65 |
2.16 |
0.05 |
7541 |
0.9520 |
0.0216 |
0.0264 |
0.9404 |
0.0366 |
0.0230 |
R code to calculate the sample size using the normal approximation $n \approx (z_{\alpha/2}\hat{\sigma}/\operatorname{margin})^2$ and then estimate the coverage of the $z$ and $t$ confidence intervals for the mean.
# the true mean and standard deviation
mu_true <- 0
sigma_true <- 1
the coverage of the confidence intervals should be 100(1-alpha) = 95%
alpha <- 0.05
calculate_sample_size <- function(margin, known_sigma) {
Use the normal approximation to choose the sample size
z_alpha <- qnorm(1 - alpha / 2)
ceiling((z_alpha * known_sigma / margin)^2)
}
estimate_std_dev <- function(sigma) {
How accurate is the "known" standard deviation?
Let's assume it is 2.5% higher than true std. deviation.
sigma * 1.025
}
get_moments <- function(distribution = c("normal", "laplace", "lognormal")) {
if (distribution == "lognormal") {
# The log-normal distribution is not symmetric;
# it's skewed to the right.
mu_pop <- exp(mu_true + sigma_true^2 / 2)
sd_pop <- sqrt((exp(sigma_true^2) - 1) * exp(2 * mu_true + sigma_true^2))
} else if (distribution == "laplace") {
# The Laplace distribution is symmetric, with mean = location
# and variance = 2 * scale^2.
mu_pop <- mu_true
sd_pop <- sqrt(2) * sigma_true
} else {
mu_pop <- mu_true
sd_pop <- sigma_true
}
c(mu_pop, sd_pop)
}
coverage <- function(n, distribution = c("normal", "laplace", "lognormal")) {
distribution <- match.arg(distribution)
if (distribution == "lognormal") {
x <- rlnorm(n, meanlog = mu_true, sdlog = sigma_true)
} else if (distribution == "laplace") {
x <- VGAM::rlaplace(n, location = mu_true, scale = sigma_true)
} else {
x <- rnorm(n, mean = mu_true, sd = sigma_true)
}
xbar <- mean(x)
mean_stddev <- get_moments(distribution)
mu_pop <- mean_stddev[1]
sd_pop <- mean_stddev[2]
sd_known <- estimate_std_dev(sd_pop)
z_alpha <- qnorm(1 - alpha / 2)
t_alpha <- qt(1 - alpha / 2, df = n - 1)
c(
abs(xbar - mu_pop) < z_alpha * sd_known / sqrt(n),
mu_pop > xbar + z_alpha * sd_known / sqrt(n),
mu_pop < xbar - z_alpha * sd_known / sqrt(n),
abs(xbar - mu_pop) < t_alpha * sd(x) / sqrt(n),
mu_pop > xbar + t_alpha * sd(x) / sqrt(n),
mu_pop < xbar - t_alpha * sd(x) / sqrt(n)
)
}
calculate_coverage <- function(margin_of_error, distribution) {
mean_stddev <- get_moments(distribution)
true_mean <- mean_stddev[1]
true_stddev <- mean_stddev[2]
known_stddev <- estimate_std_dev(true_stddev)
sample_size <- calculate_sample_size(margin_of_error, known_stddev)
nreps <- 5000
stats <- rowMeans(replicate(nreps, coverage(sample_size, distribution)))
data.frame(
"distribution" = distribution,
"mean" = true_mean,
"std dev" = true_stddev,
"margin of error" = margin_of_error,
"sample size" = sample_size,
"z_coverage" = stats[1],
"z_lower" = stats[2],
"z_upper" = stats[3],
"t_coverage" = stats[4],
"t_lower" = stats[5],
"t_upper" = stats[6]
)
}
set.seed(12345)
rows <- data.frame()
for (margin in c(0.5, 0.1, 0.05)) {
for (distribution in c("normal", "laplace", "lognormal")) {
rows <- rbind(rows, calculate_coverage(margin, distribution))
}
}
knitr::kable(rows, format = "pipe")