Added bootstrap hypothesis test for means
This commit is contained in:
commit
5603efceb2
7 changed files with 221 additions and 0 deletions
2
.Rbuildignore
Normal file
2
.Rbuildignore
Normal file
|
@ -0,0 +1,2 @@
|
|||
^.*\.Rproj$
|
||||
^\.Rproj\.user$
|
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
.Rproj.user
|
||||
.Rhistory
|
||||
.RData
|
||||
.Ruserdata
|
11
DESCRIPTION
Normal file
11
DESCRIPTION
Normal file
|
@ -0,0 +1,11 @@
|
|||
Package: bootstrapr
|
||||
Type: Package
|
||||
Title: What the Package Does (Title Case)
|
||||
Version: 0.1.0
|
||||
Author: Who wrote it
|
||||
Maintainer: The package maintainer <yourself@somewhere.net>
|
||||
Description: More about what it does (maybe more than one line)
|
||||
Use four spaces when indenting paragraphs within the Description.
|
||||
License: What license is it under?
|
||||
Encoding: UTF-8
|
||||
LazyData: true
|
1
NAMESPACE
Normal file
1
NAMESPACE
Normal file
|
@ -0,0 +1 @@
|
|||
exportPattern("^[[:alpha:]]+")
|
118
R/bootstrap_mean.R
Normal file
118
R/bootstrap_mean.R
Normal file
|
@ -0,0 +1,118 @@
|
|||
bootstrap.mean.test = function(x, y = NULL, alternative = c("two.sided", "less", "greater"), mu = 0, conf.level = 0.95, bootstrap_samples = 1000000) {
|
||||
## Select the alternative chosen
|
||||
alternative = match.arg(alternative)
|
||||
|
||||
## Make sure argument mu is correct
|
||||
if(!missing(mu) && (length(mu) != 1 || is.na(mu))) {
|
||||
stop("'mu' must be a single number")
|
||||
}
|
||||
|
||||
## Make sure confidence interval is between 0 and 1
|
||||
if(!missing(conf.level) && (length(conf.level) != 1 || !is.finite(conf.level) || conf.level < 0 || conf.level > 1)) {
|
||||
stop("'conf.level' must be a single number between 0 and 1")
|
||||
}
|
||||
|
||||
if (is.null(y)) {
|
||||
dname = deparse(substitute(x))
|
||||
} else {
|
||||
dname = paste(deparse(substitute(x)), "and", deparse(substitute(y)))
|
||||
}
|
||||
|
||||
## Take out missing values
|
||||
x = x[!is.na(x)]
|
||||
if (!is.null(y)) {
|
||||
y = y[!is.na(y)]
|
||||
}
|
||||
|
||||
## Check to see if we have enough observations and enough variation in data
|
||||
nx = length(x)
|
||||
mx = mean(x)
|
||||
vx = var(x)
|
||||
if (is.null(y)) {
|
||||
if (nx < 2) {
|
||||
stop("not enough 'x' observations")
|
||||
}
|
||||
stderr = sqrt(vx / nx)
|
||||
if (stderr < 10 * .Machine$double.eps * abs(mx)) {
|
||||
stop("data are essentially constant")
|
||||
}
|
||||
} else {
|
||||
ny = length(y)
|
||||
if(nx < 2) {
|
||||
stop("not enough 'x' observations")
|
||||
}
|
||||
if(ny < 2) {
|
||||
stop("not enough 'y' observations")
|
||||
}
|
||||
my = mean(y)
|
||||
vy = var(y)
|
||||
stderrx = sqrt(vx / nx)
|
||||
stderry = sqrt(vy / ny)
|
||||
stderr = sqrt(stderrx^2 + stderry^2)
|
||||
if(stderr < 10 * .Machine$double.eps * max(abs(mx), abs(my))) {
|
||||
stop("data are essentially constant")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
## Preallocate the vectors
|
||||
bootstrap_replicants = numeric(bootstrap_samples)
|
||||
bootstrap_means = numeric(bootstrap_samples)
|
||||
## If it's a one sample bootstrap test
|
||||
if (is.null(y)) {
|
||||
observed_difference = mx - mu
|
||||
## Simulate null hypothesis by shifting x to have a new mean of mu
|
||||
x_shifted = x - mx + mu
|
||||
for (i in 1:bootstrap_samples) {
|
||||
## Resample under the null hypothesis the difference between the means of mu
|
||||
bootstrap_xshifted = sample(x_shifted, nx, replace = T)
|
||||
bootstrap_replicants[i] = mean(bootstrap_xshifted) - mu
|
||||
## Resample original x for confidence interval
|
||||
bootstrap_x = sample(x, nx, replace = T)
|
||||
bootstrap_means[i] = mean(bootstrap_x)
|
||||
}
|
||||
method = "One Sample Bootstrap Test"
|
||||
estimate = setNames(mx, "mean of x")
|
||||
|
||||
} else { ## If it's a two sample bootstrap test
|
||||
observed_difference = mx - my - mu
|
||||
## Under the null hypothesis the difference in means between x and y is mu
|
||||
pooled_mean = mean(c(x, y)) + mu
|
||||
x_shifted = x - mx + pooled_mean
|
||||
y_shifted = y - my + pooled_mean
|
||||
for (i in 1:bootstrap_samples) {
|
||||
## Resample under null hypothesis the difference of means
|
||||
bootstrap_xshifted = sample(x_shifted, nx, replace = T)
|
||||
bootstrap_yshifted = sample(y_shifted, ny, replace = T)
|
||||
bootstrap_replicants[i] = mean(bootstrap_xshifted) - mean(bootstrap_yshifted) - mu
|
||||
## Resample original data for confidence interval
|
||||
bootstrap_x = sample(x, nx, replace = T)
|
||||
bootstrap_y = sample(x, ny, replace = T)
|
||||
bootstrap_means[i] = mean(bootstrap_x) - mean(bootstrap_y)
|
||||
}
|
||||
method = "Two Sample Bootstrap Test"
|
||||
estimate = c(mx,my)
|
||||
names(estimate) = c("mean of x","mean of y")
|
||||
}
|
||||
|
||||
if (alternative == "two.sided") {
|
||||
pval = sum(abs(bootstrap_replicants) >= abs(observed_difference)) / bootstrap_samples
|
||||
lower_bound = quantile(bootstrap_means, (1 - conf.level) / 2)
|
||||
upper_bound = quantile(bootstrap_means, conf.level + (1 - conf.level) / 2)
|
||||
} else if (alternative == "less") {
|
||||
pval = sum(bootstrap_replicants <= observed_difference) / bootstrap_samples
|
||||
lower_bound = -Inf
|
||||
upper_bound = quantile(bootstrap_means, conf.level)
|
||||
} else if (alternative == "greater") {
|
||||
pval = sum(bootstrap_replicants >= observed_difference) / bootstrap_samples
|
||||
lower_bound = quantile(bootstrap_means, 1 - conf.level)
|
||||
upper_bound = Inf
|
||||
}
|
||||
cint = c(lower_bound, upper_bound)
|
||||
names(mu) = if(!is.null(y)) "difference in means" else "mean"
|
||||
attr(cint,"conf.level") = conf.level
|
||||
rval = list(p.value = pval, conf.int = cint, estimate = estimate, null.value = mu,
|
||||
alternative = alternative, method = method, data.name = dname)
|
||||
class(rval) = "htest"
|
||||
return(rval)
|
||||
}
|
20
bootstrapr.Rproj
Normal file
20
bootstrapr.Rproj
Normal file
|
@ -0,0 +1,20 @@
|
|||
Version: 1.0
|
||||
|
||||
RestoreWorkspace: Default
|
||||
SaveWorkspace: Default
|
||||
AlwaysSaveHistory: Default
|
||||
|
||||
EnableCodeIndexing: Yes
|
||||
UseSpacesForTab: Yes
|
||||
NumSpacesForTab: 2
|
||||
Encoding: UTF-8
|
||||
|
||||
RnwWeave: Sweave
|
||||
LaTeX: pdfLaTeX
|
||||
|
||||
AutoAppendNewline: Yes
|
||||
StripTrailingWhitespace: Yes
|
||||
|
||||
BuildType: Package
|
||||
PackageUseDevtools: Yes
|
||||
PackageInstallArgs: --no-multiarch --with-keep.source
|
65
man/bootstrap.mean.test.Rd
Normal file
65
man/bootstrap.mean.test.Rd
Normal file
|
@ -0,0 +1,65 @@
|
|||
\name{bootstrap.mean.test}
|
||||
\alias{bootstrap.mean.test}
|
||||
\title{Bootstrap Test for Means}
|
||||
\description{Performs one and two sample bootstrap tests for means on vectors of data}
|
||||
\usage{
|
||||
bootstrap.mean.test(x, ...)
|
||||
|
||||
## Default method:
|
||||
bootstrap.mean.test(x, y = NULL, alternative = c("two.sided", "less", "greater"), mu = 0, conf.level = 0.95, bootstrap_samples = 1e+06)
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{a (non-empty) numeric vector of data values.}
|
||||
\item{y}{an optional (non-empty) numeric vector of data values.}
|
||||
\item{alternative}{a character string specifying the alternative hypothesis, must be one of "two.sided" (default), "greater" or "less". You can specify just the initial letter.}
|
||||
\item{mu}{a number indicating the true value of the mean (or difference in means if you are performing a two sample test).}
|
||||
\item{conf.level}{confidence level of the interval.}
|
||||
\item{bootstrap_samples}{the number of bootstrap replicants to generate. Influences the accuracy of the p-value and confidence interval}
|
||||
}
|
||||
\details{
|
||||
alternative = "greater" is the alternative that x has a larger mean than y.
|
||||
|
||||
If the input data are effectively constant (compared to the larger of the two means) an error is generated.
|
||||
}
|
||||
\value{
|
||||
A list with class \code{"htest"} containing the following components:
|
||||
\item{p.value}{the p-value for the test.}
|
||||
\item{conf.int}{a confidence interval for the mean appropriate to the
|
||||
specified alternative hypothesis.}
|
||||
\item{estimate}{the estimated mean or difference in means depending on
|
||||
whether it was a one-sample test or a two-sample test.}
|
||||
\item{null.value}{the specified hypothesized value of the mean or mean
|
||||
difference depending on whether it was a one-sample test or a
|
||||
two-sample test.}
|
||||
\item{alternative}{a character string describing the alternative
|
||||
hypothesis.}
|
||||
\item{method}{a character string indicating what type of t-test was
|
||||
performed.}
|
||||
\item{data.name}{a character string giving the name(s) of the data.}
|
||||
}
|
||||
\references{
|
||||
Gould, Rob. Bootstrap Hypothesis Test. PDF.
|
||||
http://www.stat.ucla.edu/~rgould/110as02/bshypothesis.pdf
|
||||
|
||||
Myung, Jay. Bootstrap Hypothesis Testing. PDF.
|
||||
http://faculty.psy.ohio-state.edu/myung/personal/course/826/bootstrap_hypo.pdf
|
||||
|
||||
}
|
||||
\author{
|
||||
Brandon Rozek (brozek@mail.umw.edu)
|
||||
}
|
||||
|
||||
\seealso{
|
||||
\code{\link{t.test}}
|
||||
}
|
||||
\examples{
|
||||
x = rnorm(200, 2, 5)
|
||||
y = rnorm(200, 2, 5)
|
||||
# One sample test
|
||||
bootstrap.mean.test(x, mu = 1)
|
||||
|
||||
# Two sample test
|
||||
bootstrap.mean.test(x, y)
|
||||
}
|
||||
|
||||
\keyword{htest}
|
Loading…
Reference in a new issue