Added bootstrap hypothesis test for means

2017-12-19 18:48:21 -05:00 · 2017-12-19 18:48:21 -05:00 · 5603efceb2
commit 5603efceb2
7 changed files with 221 additions and 0 deletions
--- a/.Rbuildignore
+++ b/.Rbuildignore
@ -0,0 +1,2 @@
 ^.*\.Rproj$
 ^\.Rproj\.user$
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,4 @@
 .Rproj.user
 .Rhistory
 .RData
 .Ruserdata
--- a/11
+++ b/11
@ -0,0 +1,11 @@
 Package: bootstrapr
 Type: Package
 Title: What the Package Does (Title Case)
 Version: 0.1.0
 Author: Who wrote it
 Maintainer: The package maintainer <yourself@somewhere.net>
 Description: More about what it does (maybe more than one line)
    Use four spaces when indenting paragraphs within the Description.
 License: What license is it under?
 Encoding: UTF-8
 LazyData: true
--- a/1
+++ b/1
@ -0,0 +1 @@
 exportPattern("^[[:alpha:]]+")
--- a/R/bootstrap_mean.R
+++ b/R/bootstrap_mean.R
@ -0,0 +1,118 @@
 bootstrap.mean.test = function(x, y = NULL, alternative = c("two.sided", "less", "greater"), mu = 0, conf.level = 0.95, bootstrap_samples = 1000000) {
    ## Select the alternative chosen
    alternative = match.arg(alternative)
    ## Make sure argument mu is correct
    if(!missing(mu) && (length(mu) != 1 || is.na(mu))) {
        stop("'mu' must be a single number")
    }
    ## Make sure confidence interval is between 0 and 1
    if(!missing(conf.level) && (length(conf.level) != 1 || !is.finite(conf.level) || conf.level < 0 || conf.level > 1)) {
        stop("'conf.level' must be a single number between 0 and 1")
    }
    if (is.null(y)) {
        dname = deparse(substitute(x))
    } else {
        dname = paste(deparse(substitute(x)), "and", deparse(substitute(y)))
    }
    ## Take out missing values
    x = x[!is.na(x)]
    if (!is.null(y)) {
        y = y[!is.na(y)]
    }
    ## Check to see if we have enough observations and enough variation in data
    nx = length(x)
    mx = mean(x)
    vx = var(x)
   if (is.null(y)) {
        if (nx < 2) {
            stop("not enough 'x' observations")
        }
 	    stderr = sqrt(vx / nx)
        if (stderr < 10 * .Machine$double.eps * abs(mx)) {
            stop("data are essentially constant")
        }
    } else {
 	    ny = length(y)
        if(nx < 2) {
            stop("not enough 'x' observations")
        }
 	    if(ny < 2) {
            stop("not enough 'y' observations")
        }
 	    my = mean(y)
 	    vy = var(y)
 	    stderrx = sqrt(vx / nx)
 	    stderry = sqrt(vy / ny)
 	    stderr = sqrt(stderrx^2 + stderry^2)
        if(stderr < 10 * .Machine$double.eps * max(abs(mx), abs(my))) {
            stop("data are essentially constant")
        }
    }
    ## Preallocate the vectors
    bootstrap_replicants = numeric(bootstrap_samples)
    bootstrap_means = numeric(bootstrap_samples)
    ## If it's a one sample bootstrap test
    if (is.null(y)) {
        observed_difference = mx - mu
        ## Simulate null hypothesis by shifting x to have a new mean of mu
        x_shifted = x - mx + mu
        for (i in 1:bootstrap_samples) {
            ## Resample under the null hypothesis the difference between the means of mu
            bootstrap_xshifted = sample(x_shifted, nx, replace = T)
            bootstrap_replicants[i] = mean(bootstrap_xshifted) - mu
            ## Resample original x for confidence interval 
            bootstrap_x = sample(x, nx, replace = T)
            bootstrap_means[i] = mean(bootstrap_x)
        }
        method = "One Sample Bootstrap Test"
        estimate = setNames(mx, "mean of x")
    } else { ## If it's a two sample bootstrap test
        observed_difference = mx - my - mu
        ## Under the null hypothesis the difference in means between x and y is mu
        pooled_mean = mean(c(x, y)) + mu
        x_shifted = x - mx + pooled_mean
        y_shifted = y - my + pooled_mean
        for (i in 1:bootstrap_samples) {
            ## Resample under null hypothesis the difference of means
            bootstrap_xshifted = sample(x_shifted, nx, replace = T)
            bootstrap_yshifted = sample(y_shifted, ny, replace = T)
            bootstrap_replicants[i] = mean(bootstrap_xshifted) - mean(bootstrap_yshifted) - mu
            ## Resample original data for confidence interval
            bootstrap_x = sample(x, nx, replace = T)
            bootstrap_y = sample(x, ny, replace = T)
            bootstrap_means[i] = mean(bootstrap_x) - mean(bootstrap_y)
        }
        method = "Two Sample Bootstrap Test"
        estimate = c(mx,my)
        names(estimate) = c("mean of x","mean of y")
    }
    if (alternative == "two.sided") {
        pval = sum(abs(bootstrap_replicants) >= abs(observed_difference)) / bootstrap_samples
        lower_bound = quantile(bootstrap_means, (1 - conf.level) / 2)
        upper_bound = quantile(bootstrap_means, conf.level + (1 - conf.level) / 2)
    } else if (alternative == "less") {
        pval = sum(bootstrap_replicants <= observed_difference) / bootstrap_samples
        lower_bound = -Inf
        upper_bound = quantile(bootstrap_means, conf.level)
    } else if (alternative == "greater") {
        pval = sum(bootstrap_replicants >= observed_difference) / bootstrap_samples
        lower_bound = quantile(bootstrap_means, 1 - conf.level)
        upper_bound = Inf
    }
    cint = c(lower_bound, upper_bound)
    names(mu) = if(!is.null(y)) "difference in means" else "mean"
    attr(cint,"conf.level") = conf.level
    rval = list(p.value = pval, conf.int = cint, estimate = estimate, null.value = mu,
 	       alternative = alternative, method = method, data.name = dname)
    class(rval) = "htest"
    return(rval)
 }
--- a/bootstrapr.Rproj
+++ b/bootstrapr.Rproj
@ -0,0 +1,20 @@
 Version: 1.0
 RestoreWorkspace: Default
 SaveWorkspace: Default
 AlwaysSaveHistory: Default
 EnableCodeIndexing: Yes
 UseSpacesForTab: Yes
 NumSpacesForTab: 2
 Encoding: UTF-8
 RnwWeave: Sweave
 LaTeX: pdfLaTeX
 AutoAppendNewline: Yes
 StripTrailingWhitespace: Yes
 BuildType: Package
 PackageUseDevtools: Yes
 PackageInstallArgs: --no-multiarch --with-keep.source
--- a/man/bootstrap.mean.test.Rd
+++ b/man/bootstrap.mean.test.Rd
@ -0,0 +1,65 @@
 \name{bootstrap.mean.test}
 \alias{bootstrap.mean.test}
 \title{Bootstrap Test for Means}
 \description{Performs one and two sample bootstrap tests for means on vectors of data}
 \usage{
 bootstrap.mean.test(x, ...)
 ## Default method:
 bootstrap.mean.test(x, y = NULL, alternative = c("two.sided", "less", "greater"), mu = 0, conf.level = 0.95, bootstrap_samples = 1e+06)
 }
 \arguments{
  \item{x}{a (non-empty) numeric vector of data values.}
  \item{y}{an optional (non-empty) numeric vector of data values.}
  \item{alternative}{a character string specifying the alternative hypothesis, must be one of "two.sided" (default), "greater" or "less". You can specify just the initial letter.}
  \item{mu}{a number indicating the true value of the mean (or difference in means if you are performing a two sample test).}
  \item{conf.level}{confidence level of the interval.}
  \item{bootstrap_samples}{the number of bootstrap replicants to generate. Influences the accuracy of the p-value and confidence interval}
 }
 \details{
 alternative = "greater" is the alternative that x has a larger mean than y.
 If the input data are effectively constant (compared to the larger of the two means) an error is generated.
 }
 \value{
 A list with class \code{"htest"} containing the following components:
  \item{p.value}{the p-value for the test.}
  \item{conf.int}{a confidence interval for the mean appropriate to the
    specified alternative hypothesis.}
  \item{estimate}{the estimated mean or difference in means depending on
    whether it was a one-sample test or a two-sample test.}
  \item{null.value}{the specified hypothesized value of the mean or mean
    difference depending on whether it was a one-sample test or a
    two-sample test.}
  \item{alternative}{a character string describing the alternative
    hypothesis.}
  \item{method}{a character string indicating what type of t-test was
    performed.}
  \item{data.name}{a character string giving the name(s) of the data.}
 }
 \references{
 Gould, Rob. Bootstrap Hypothesis Test. PDF.
 http://www.stat.ucla.edu/~rgould/110as02/bshypothesis.pdf
 Myung, Jay. Bootstrap Hypothesis Testing. PDF.
 http://faculty.psy.ohio-state.edu/myung/personal/course/826/bootstrap_hypo.pdf
 }
 \author{
 Brandon Rozek (brozek@mail.umw.edu)
 }
 \seealso{
  \code{\link{t.test}}
 }
 \examples{
 x = rnorm(200, 2, 5)
 y = rnorm(200, 2, 5)
 # One sample test
 bootstrap.mean.test(x, mu = 1)
 # Two sample test
 bootstrap.mean.test(x, y)
 }
 \keyword{htest}