These functions simplify and unify sampling in various ways.
resample(..., replace = TRUE)
deal(...)
shuffle(x, replace = FALSE, prob = NULL, groups = NULL, orig.ids = FALSE)
sample(x, size, replace = FALSE, ...)
# Default S3 method
sample(
x,
size,
replace = FALSE,
prob = NULL,
groups = NULL,
orig.ids = FALSE,
...
)
# S3 method for class 'data.frame'
sample(
x,
size,
replace = FALSE,
prob = NULL,
groups = NULL,
orig.ids = TRUE,
fixed = names(x),
shuffled = c(),
invisibly.return = NULL,
...
)
# S3 method for class 'matrix'
sample(
x,
size,
replace = FALSE,
prob = NULL,
groups = NULL,
orig.ids = FALSE,
...
)
# S3 method for class 'factor'
sample(
x,
size,
replace = FALSE,
prob = NULL,
groups = NULL,
orig.ids = FALSE,
drop.unused.levels = FALSE,
...
)
# S3 method for class 'lm'
sample(
x,
size,
replace = FALSE,
prob = NULL,
groups = NULL,
orig.ids = FALSE,
drop.unused.levels = FALSE,
parametric = FALSE,
transformation = NULL,
...
)additional arguments passed to
base::sample()
or sample().
Should sampling be with replacement?
Either a vector of one or more elements from which to choose, or a positive integer.
A vector of probability weights for obtaining the elements of the vector being sampled.
a vector (or variable in a data frame) specifying groups to sample within. This will be recycled if necessary.
a logical; should original ids be included in returned data frame?
a non-negative integer giving the number of items to choose.
a vector of column names. These variables are shuffled en masse, preserving associations among these columns.
a vector of column names.
these variables are reshuffled individually (within groups if groups is
specified), breaking associations among these columns.
examples.
a logical, should return be invisible?
a logical, should unused levels be dropped?
A logical indicating whether the resampling should be done parametrically.
NULL or a function providing a transformation to be applied to the
synthetic responses. If NULL, an attempt it made to infer the appropriate transformation
from the original call as recorded in x.
These functions are wrappers around sample() providing different defaults and
natural names.
# 100 Bernoulli trials -- no need for replace=TRUE
resample(0:1, 100)
#> [1] 1 1 1 1 1 1 1 0 0 0 1 0 0 1 1 0 0 1 0 0 0 1 1 1 1 0 1 1 1 0 1 1 0 1 1 1 0
#> [38] 1 1 0 0 1 0 0 1 1 0 0 0 1 1 1 0 0 1 1 0 0 1 1 1 0 0 0 1 0 1 1 1 0 0 0 1 1
#> [75] 1 1 0 1 0 1 0 1 1 1 0 0 0 0 0 0 0 0 0 1 0 1 0 1 1 0
tally(resample(0:1, 100))
#> X
#> 0 1
#> 54 46
if (require(mosaicData)) {
Small <- sample(KidsFeet, 10)
resample(Small)
tally(~ sex, data=resample(Small))
tally(~ sex, data=resample(Small))
# fixed marginals for sex
tally(~ sex, data=Small)
tally(~ sex, data=resample(Small, groups=sex))
# shuffled can be used to reshuffle some variables within groups
# orig.id shows where the values were in original data frame.
Small <- mutate(Small,
id1 = paste(sex,1:10, sep=":"),
id2 = paste(sex,1:10, sep=":"))
resample(Small, groups=sex, shuffled=c("id1","id2"))
}
#> name birthmonth birthyear length width sex biggerfoot domhand orig.id
#> 1 David 5 88 24.4 8.4 B L R 10.3.3
#> 2 Lars 10 87 25.4 8.8 B L L 8.8.8
#> 22 Josh 7 88 24.4 8.6 B L R 3.8.8
#> 2.1 Lars 10 87 25.4 8.8 B L L 8.10.4
#> 13 Cal 8 87 26.1 9.1 B L R 4.8.10
#> 22.1 Josh 7 88 24.4 8.6 B L R 3.3.8
#> 2.2 Lars 10 87 25.4 8.8 B L L 8.4.3
#> 31 Danielle 6 88 24.0 9.3 G L R 5.5.5
#> 31.1 Danielle 6 88 24.0 9.3 G L R 5.5.6
#> 32 Caitlin 7 88 22.5 8.6 G R R 6.6.5
#> id1 id2
#> 1 B:3 B:3
#> 2 B:8 B:8
#> 22 B:8 B:8
#> 2.1 B:10 B:4
#> 13 B:8 B:10
#> 22.1 B:3 B:8
#> 2.2 B:4 B:3
#> 31 G:5 G:5
#> 31.1 G:5 G:6
#> 32 G:6 G:5
deal(Cards, 13) # A Bridge hand
#> [1] "5H" "AD" "4S" "6H" "5D" "JD" "AH" "7D" "4C" "9H" "QD" "5S" "6S"
shuffle(Cards)
#> [1] "6H" "QC" "QS" "4C" "KC" "AH" "KD" "10C" "2D" "KS" "3C" "4H"
#> [13] "9S" "QD" "10D" "QH" "9C" "JH" "8C" "8D" "2C" "5S" "7H" "9H"
#> [25] "6D" "KH" "3S" "9D" "8H" "7S" "2H" "7C" "AC" "JD" "4D" "3H"
#> [37] "6S" "5H" "10H" "3D" "JC" "5D" "10S" "JS" "5C" "AS" "6C" "7D"
#> [49] "4S" "AD" "8S" "2S"
model <- lm(width ~length * sex, data = KidsFeet)
KidsFeet |> head()
#> name birthmonth birthyear length width sex biggerfoot domhand
#> 1 David 5 88 24.4 8.4 B L R
#> 2 Lars 10 87 25.4 8.8 B L L
#> 3 Zach 12 87 24.5 9.7 B R R
#> 4 Josh 1 88 25.2 9.8 B L R
#> 5 Lang 2 88 25.1 8.9 B L R
#> 6 Scotty 3 88 25.7 9.7 B R R
resample(model) |> head()
#> width length sex
#> 1 9.450968 24.4 B
#> 2 9.541661 25.4 B
#> 3 9.132142 24.5 B
#> 4 8.892623 25.2 B
#> 5 8.887376 25.1 B
#> 6 9.027574 25.7 B
Boot <- do(500) * lm(width ~ length * sex, data = resample(KidsFeet))
df_stats(~ Intercept + length + sexG + length.sexG, data = Boot, sd)
#> response sd
#> 1 Intercept 1.45545587
#> 2 length 0.05719653
#> 3 sexG 2.04265257
#> 4 length.sexG 0.08144980
head(Boot)
#> Intercept length sexG length.sexG sigma r.squared F
#> 1 3.911041 0.2157749 -1.6956410 0.047812597 0.3655883 0.5642731 15.108515
#> 2 3.195788 0.2401644 3.0358974 -0.130008268 0.3759059 0.4025102 7.859469
#> 3 5.461899 0.1501777 -1.8295527 0.066548966 0.3583709 0.4176635 8.367571
#> 4 2.776535 0.2520132 -1.2200554 0.043186805 0.4344815 0.3925011 7.537756
#> 5 3.983021 0.2094032 -0.3516293 0.005607108 0.3355874 0.5256579 12.928801
#> 6 6.513272 0.1116401 -3.7683595 0.142711321 0.3449032 0.5585876 14.763646
#> numdf dendf .row .index
#> 1 3 35 1 1
#> 2 3 35 1 2
#> 3 3 35 1 3
#> 4 3 35 1 4
#> 5 3 35 1 5
#> 6 3 35 1 6
summary(coef(model))
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> -0.6239 -0.1441 0.1142 0.8642 1.1225 3.8521