118 lines
4.6 KiB
R
118 lines
4.6 KiB
R
|
bootstrap.mean.test = function(x, y = NULL, alternative = c("two.sided", "less", "greater"), mu = 0, conf.level = 0.95, bootstrap_samples = 1000000) {
|
||
|
## Select the alternative chosen
|
||
|
alternative = match.arg(alternative)
|
||
|
|
||
|
## Make sure argument mu is correct
|
||
|
if(!missing(mu) && (length(mu) != 1 || is.na(mu))) {
|
||
|
stop("'mu' must be a single number")
|
||
|
}
|
||
|
|
||
|
## Make sure confidence interval is between 0 and 1
|
||
|
if(!missing(conf.level) && (length(conf.level) != 1 || !is.finite(conf.level) || conf.level < 0 || conf.level > 1)) {
|
||
|
stop("'conf.level' must be a single number between 0 and 1")
|
||
|
}
|
||
|
|
||
|
if (is.null(y)) {
|
||
|
dname = deparse(substitute(x))
|
||
|
} else {
|
||
|
dname = paste(deparse(substitute(x)), "and", deparse(substitute(y)))
|
||
|
}
|
||
|
|
||
|
## Take out missing values
|
||
|
x = x[!is.na(x)]
|
||
|
if (!is.null(y)) {
|
||
|
y = y[!is.na(y)]
|
||
|
}
|
||
|
|
||
|
## Check to see if we have enough observations and enough variation in data
|
||
|
nx = length(x)
|
||
|
mx = mean(x)
|
||
|
vx = var(x)
|
||
|
if (is.null(y)) {
|
||
|
if (nx < 2) {
|
||
|
stop("not enough 'x' observations")
|
||
|
}
|
||
|
stderr = sqrt(vx / nx)
|
||
|
if (stderr < 10 * .Machine$double.eps * abs(mx)) {
|
||
|
stop("data are essentially constant")
|
||
|
}
|
||
|
} else {
|
||
|
ny = length(y)
|
||
|
if(nx < 2) {
|
||
|
stop("not enough 'x' observations")
|
||
|
}
|
||
|
if(ny < 2) {
|
||
|
stop("not enough 'y' observations")
|
||
|
}
|
||
|
my = mean(y)
|
||
|
vy = var(y)
|
||
|
stderrx = sqrt(vx / nx)
|
||
|
stderry = sqrt(vy / ny)
|
||
|
stderr = sqrt(stderrx^2 + stderry^2)
|
||
|
if(stderr < 10 * .Machine$double.eps * max(abs(mx), abs(my))) {
|
||
|
stop("data are essentially constant")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
## Preallocate the vectors
|
||
|
bootstrap_replicants = numeric(bootstrap_samples)
|
||
|
bootstrap_means = numeric(bootstrap_samples)
|
||
|
## If it's a one sample bootstrap test
|
||
|
if (is.null(y)) {
|
||
|
observed_difference = mx - mu
|
||
|
## Simulate null hypothesis by shifting x to have a new mean of mu
|
||
|
x_shifted = x - mx + mu
|
||
|
for (i in 1:bootstrap_samples) {
|
||
|
## Resample under the null hypothesis the difference between the means of mu
|
||
|
bootstrap_xshifted = sample(x_shifted, nx, replace = T)
|
||
|
bootstrap_replicants[i] = mean(bootstrap_xshifted) - mu
|
||
|
## Resample original x for confidence interval
|
||
|
bootstrap_x = sample(x, nx, replace = T)
|
||
|
bootstrap_means[i] = mean(bootstrap_x)
|
||
|
}
|
||
|
method = "One Sample Bootstrap Test"
|
||
|
estimate = setNames(mx, "mean of x")
|
||
|
|
||
|
} else { ## If it's a two sample bootstrap test
|
||
|
observed_difference = mx - my - mu
|
||
|
## Under the null hypothesis the difference in means between x and y is mu
|
||
|
pooled_mean = mean(c(x, y)) + mu
|
||
|
x_shifted = x - mx + pooled_mean
|
||
|
y_shifted = y - my + pooled_mean
|
||
|
for (i in 1:bootstrap_samples) {
|
||
|
## Resample under null hypothesis the difference of means
|
||
|
bootstrap_xshifted = sample(x_shifted, nx, replace = T)
|
||
|
bootstrap_yshifted = sample(y_shifted, ny, replace = T)
|
||
|
bootstrap_replicants[i] = mean(bootstrap_xshifted) - mean(bootstrap_yshifted) - mu
|
||
|
## Resample original data for confidence interval
|
||
|
bootstrap_x = sample(x, nx, replace = T)
|
||
|
bootstrap_y = sample(x, ny, replace = T)
|
||
|
bootstrap_means[i] = mean(bootstrap_x) - mean(bootstrap_y)
|
||
|
}
|
||
|
method = "Two Sample Bootstrap Test"
|
||
|
estimate = c(mx,my)
|
||
|
names(estimate) = c("mean of x","mean of y")
|
||
|
}
|
||
|
|
||
|
if (alternative == "two.sided") {
|
||
|
pval = sum(abs(bootstrap_replicants) >= abs(observed_difference)) / bootstrap_samples
|
||
|
lower_bound = quantile(bootstrap_means, (1 - conf.level) / 2)
|
||
|
upper_bound = quantile(bootstrap_means, conf.level + (1 - conf.level) / 2)
|
||
|
} else if (alternative == "less") {
|
||
|
pval = sum(bootstrap_replicants <= observed_difference) / bootstrap_samples
|
||
|
lower_bound = -Inf
|
||
|
upper_bound = quantile(bootstrap_means, conf.level)
|
||
|
} else if (alternative == "greater") {
|
||
|
pval = sum(bootstrap_replicants >= observed_difference) / bootstrap_samples
|
||
|
lower_bound = quantile(bootstrap_means, 1 - conf.level)
|
||
|
upper_bound = Inf
|
||
|
}
|
||
|
cint = c(lower_bound, upper_bound)
|
||
|
names(mu) = if(!is.null(y)) "difference in means" else "mean"
|
||
|
attr(cint,"conf.level") = conf.level
|
||
|
rval = list(p.value = pval, conf.int = cint, estimate = estimate, null.value = mu,
|
||
|
alternative = alternative, method = method, data.name = dname)
|
||
|
class(rval) = "htest"
|
||
|
return(rval)
|
||
|
}
|