commit 5603efceb2dfe94f5f000b8a4f7b68b2a6b638a2 Author: Brandon Rozek Date: Tue Dec 19 18:48:21 2017 -0500 Added bootstrap hypothesis test for means diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..91114bf --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,2 @@ +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5b6a065 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..f145d3b --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,11 @@ +Package: bootstrapr +Type: Package +Title: What the Package Does (Title Case) +Version: 0.1.0 +Author: Who wrote it +Maintainer: The package maintainer +Description: More about what it does (maybe more than one line) + Use four spaces when indenting paragraphs within the Description. +License: What license is it under? +Encoding: UTF-8 +LazyData: true \ No newline at end of file diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..d75f824 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1 @@ +exportPattern("^[[:alpha:]]+") diff --git a/R/bootstrap_mean.R b/R/bootstrap_mean.R new file mode 100644 index 0000000..32b0385 --- /dev/null +++ b/R/bootstrap_mean.R @@ -0,0 +1,118 @@ +bootstrap.mean.test = function(x, y = NULL, alternative = c("two.sided", "less", "greater"), mu = 0, conf.level = 0.95, bootstrap_samples = 1000000) { + ## Select the alternative chosen + alternative = match.arg(alternative) + + ## Make sure argument mu is correct + if(!missing(mu) && (length(mu) != 1 || is.na(mu))) { + stop("'mu' must be a single number") + } + + ## Make sure confidence interval is between 0 and 1 + if(!missing(conf.level) && (length(conf.level) != 1 || !is.finite(conf.level) || conf.level < 0 || conf.level > 1)) { + stop("'conf.level' must be a single number between 0 and 1") + } + + if (is.null(y)) { + dname = deparse(substitute(x)) + } else { + dname = paste(deparse(substitute(x)), "and", deparse(substitute(y))) + } + + ## Take out missing values + x = x[!is.na(x)] + if (!is.null(y)) { + y = y[!is.na(y)] + } + + ## Check to see if we have enough observations and enough variation in data + nx = length(x) + mx = mean(x) + vx = var(x) + if (is.null(y)) { + if (nx < 2) { + stop("not enough 'x' observations") + } + stderr = sqrt(vx / nx) + if (stderr < 10 * .Machine$double.eps * abs(mx)) { + stop("data are essentially constant") + } + } else { + ny = length(y) + if(nx < 2) { + stop("not enough 'x' observations") + } + if(ny < 2) { + stop("not enough 'y' observations") + } + my = mean(y) + vy = var(y) + stderrx = sqrt(vx / nx) + stderry = sqrt(vy / ny) + stderr = sqrt(stderrx^2 + stderry^2) + if(stderr < 10 * .Machine$double.eps * max(abs(mx), abs(my))) { + stop("data are essentially constant") + } + } + + + ## Preallocate the vectors + bootstrap_replicants = numeric(bootstrap_samples) + bootstrap_means = numeric(bootstrap_samples) + ## If it's a one sample bootstrap test + if (is.null(y)) { + observed_difference = mx - mu + ## Simulate null hypothesis by shifting x to have a new mean of mu + x_shifted = x - mx + mu + for (i in 1:bootstrap_samples) { + ## Resample under the null hypothesis the difference between the means of mu + bootstrap_xshifted = sample(x_shifted, nx, replace = T) + bootstrap_replicants[i] = mean(bootstrap_xshifted) - mu + ## Resample original x for confidence interval + bootstrap_x = sample(x, nx, replace = T) + bootstrap_means[i] = mean(bootstrap_x) + } + method = "One Sample Bootstrap Test" + estimate = setNames(mx, "mean of x") + + } else { ## If it's a two sample bootstrap test + observed_difference = mx - my - mu + ## Under the null hypothesis the difference in means between x and y is mu + pooled_mean = mean(c(x, y)) + mu + x_shifted = x - mx + pooled_mean + y_shifted = y - my + pooled_mean + for (i in 1:bootstrap_samples) { + ## Resample under null hypothesis the difference of means + bootstrap_xshifted = sample(x_shifted, nx, replace = T) + bootstrap_yshifted = sample(y_shifted, ny, replace = T) + bootstrap_replicants[i] = mean(bootstrap_xshifted) - mean(bootstrap_yshifted) - mu + ## Resample original data for confidence interval + bootstrap_x = sample(x, nx, replace = T) + bootstrap_y = sample(x, ny, replace = T) + bootstrap_means[i] = mean(bootstrap_x) - mean(bootstrap_y) + } + method = "Two Sample Bootstrap Test" + estimate = c(mx,my) + names(estimate) = c("mean of x","mean of y") + } + + if (alternative == "two.sided") { + pval = sum(abs(bootstrap_replicants) >= abs(observed_difference)) / bootstrap_samples + lower_bound = quantile(bootstrap_means, (1 - conf.level) / 2) + upper_bound = quantile(bootstrap_means, conf.level + (1 - conf.level) / 2) + } else if (alternative == "less") { + pval = sum(bootstrap_replicants <= observed_difference) / bootstrap_samples + lower_bound = -Inf + upper_bound = quantile(bootstrap_means, conf.level) + } else if (alternative == "greater") { + pval = sum(bootstrap_replicants >= observed_difference) / bootstrap_samples + lower_bound = quantile(bootstrap_means, 1 - conf.level) + upper_bound = Inf + } + cint = c(lower_bound, upper_bound) + names(mu) = if(!is.null(y)) "difference in means" else "mean" + attr(cint,"conf.level") = conf.level + rval = list(p.value = pval, conf.int = cint, estimate = estimate, null.value = mu, + alternative = alternative, method = method, data.name = dname) + class(rval) = "htest" + return(rval) +} \ No newline at end of file diff --git a/bootstrapr.Rproj b/bootstrapr.Rproj new file mode 100644 index 0000000..497f8bf --- /dev/null +++ b/bootstrapr.Rproj @@ -0,0 +1,20 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +AutoAppendNewline: Yes +StripTrailingWhitespace: Yes + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/man/bootstrap.mean.test.Rd b/man/bootstrap.mean.test.Rd new file mode 100644 index 0000000..a7e093a --- /dev/null +++ b/man/bootstrap.mean.test.Rd @@ -0,0 +1,65 @@ +\name{bootstrap.mean.test} +\alias{bootstrap.mean.test} +\title{Bootstrap Test for Means} +\description{Performs one and two sample bootstrap tests for means on vectors of data} +\usage{ +bootstrap.mean.test(x, ...) + +## Default method: +bootstrap.mean.test(x, y = NULL, alternative = c("two.sided", "less", "greater"), mu = 0, conf.level = 0.95, bootstrap_samples = 1e+06) +} +\arguments{ + \item{x}{a (non-empty) numeric vector of data values.} + \item{y}{an optional (non-empty) numeric vector of data values.} + \item{alternative}{a character string specifying the alternative hypothesis, must be one of "two.sided" (default), "greater" or "less". You can specify just the initial letter.} + \item{mu}{a number indicating the true value of the mean (or difference in means if you are performing a two sample test).} + \item{conf.level}{confidence level of the interval.} + \item{bootstrap_samples}{the number of bootstrap replicants to generate. Influences the accuracy of the p-value and confidence interval} +} +\details{ +alternative = "greater" is the alternative that x has a larger mean than y. + +If the input data are effectively constant (compared to the larger of the two means) an error is generated. +} +\value{ +A list with class \code{"htest"} containing the following components: + \item{p.value}{the p-value for the test.} + \item{conf.int}{a confidence interval for the mean appropriate to the + specified alternative hypothesis.} + \item{estimate}{the estimated mean or difference in means depending on + whether it was a one-sample test or a two-sample test.} + \item{null.value}{the specified hypothesized value of the mean or mean + difference depending on whether it was a one-sample test or a + two-sample test.} + \item{alternative}{a character string describing the alternative + hypothesis.} + \item{method}{a character string indicating what type of t-test was + performed.} + \item{data.name}{a character string giving the name(s) of the data.} +} +\references{ +Gould, Rob. Bootstrap Hypothesis Test. PDF. +http://www.stat.ucla.edu/~rgould/110as02/bshypothesis.pdf + +Myung, Jay. Bootstrap Hypothesis Testing. PDF. +http://faculty.psy.ohio-state.edu/myung/personal/course/826/bootstrap_hypo.pdf + +} +\author{ +Brandon Rozek (brozek@mail.umw.edu) +} + +\seealso{ + \code{\link{t.test}} +} +\examples{ +x = rnorm(200, 2, 5) +y = rnorm(200, 2, 5) +# One sample test +bootstrap.mean.test(x, mu = 1) + +# Two sample test +bootstrap.mean.test(x, y) +} + +\keyword{htest}