These functions simplify and unify sampling in various ways.
resample(..., replace = TRUE)
deal(...)
shuffle(x, replace = FALSE, prob = NULL, groups = NULL, orig.ids = FALSE)
sample(x, size, replace = FALSE, ...)
# S3 method for default
sample(
x,
size,
replace = FALSE,
prob = NULL,
groups = NULL,
orig.ids = FALSE,
...
)
# S3 method for data.frame
sample(
x,
size,
replace = FALSE,
prob = NULL,
groups = NULL,
orig.ids = TRUE,
fixed = names(x),
shuffled = c(),
invisibly.return = NULL,
...
)
# S3 method for matrix
sample(
x,
size,
replace = FALSE,
prob = NULL,
groups = NULL,
orig.ids = FALSE,
...
)
# S3 method for factor
sample(
x,
size,
replace = FALSE,
prob = NULL,
groups = NULL,
orig.ids = FALSE,
drop.unused.levels = FALSE,
...
)
# S3 method for lm
sample(
x,
size,
replace = FALSE,
prob = NULL,
groups = NULL,
orig.ids = FALSE,
drop.unused.levels = FALSE,
parametric = FALSE,
transformation = NULL,
...
)additional arguments passed to
base::sample()
or sample().
Should sampling be with replacement?
Either a vector of one or more elements from which to choose, or a positive integer.
A vector of probability weights for obtaining the elements of the vector being sampled.
a vector (or variable in a data frame) specifying groups to sample within. This will be recycled if necessary.
a logical; should original ids be included in returned data frame?
a non-negative integer giving the number of items to choose.
a vector of column names. These variables are shuffled en masse, preserving associations among these columns.
a vector of column names.
these variables are reshuffled individually (within groups if groups is
specified), breaking associations among these columns.
examples.
a logical, should return be invisible?
a logical, should unused levels be dropped?
A logical indicating whether the resampling should be done parametrically.
NULL or a function providing a transformation to be applied to the
synthetic responses. If NULL, an attempt it made to infer the appropriate transformation
from the original call as recorded in x.
These functions are wrappers around sample() providing different defaults and
natural names.
# 100 Bernoulli trials -- no need for replace=TRUE
resample(0:1, 100)
#> [1] 0 0 1 0 1 1 0 0 1 0 1 1 1 0 1 0 1 1 0 0 1 0 1 0 0 1 0 0 0 0 1 1 1 1 1 1 0
#> [38] 0 0 0 1 0 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 1 1 1 0 0 0 1 1 0 1 0 1
#> [75] 1 1 0 1 1 1 0 0 1 1 0 0 1 0 1 1 0 0 1 1 0 1 0 1 0 0
tally(resample(0:1, 100))
#> X
#> 0 1
#> 52 48
if (require(mosaicData)) {
Small <- sample(KidsFeet, 10)
resample(Small)
tally(~ sex, data=resample(Small))
tally(~ sex, data=resample(Small))
# fixed marginals for sex
tally(~ sex, data=Small)
tally(~ sex, data=resample(Small, groups=sex))
# shuffled can be used to reshuffle some variables within groups
# orig.id shows where the values were in original data frame.
Small <- mutate(Small,
id1 = paste(sex,1:10, sep=":"),
id2 = paste(sex,1:10, sep=":"))
resample(Small, groups=sex, shuffled=c("id1","id2"))
}
#> name birthmonth birthyear length width sex biggerfoot domhand orig.id
#> 3 Zach 12 87 24.5 9.7 B R R 8.8.1
#> 6 Scotty 3 88 25.7 9.7 B R R 1.1.8
#> 6.1 Scotty 3 88 25.7 9.7 B R R 1.1.1
#> 38 Hayley 1 88 21.6 7.9 G R R 10.6.7
#> 20 Heather 3 88 25.5 9.5 G R R 6.2.2
#> 15 Julie 11 87 26.0 9.3 G L R 2.7.6
#> 20.1 Heather 3 88 25.5 9.5 G R R 6.3.3
#> 27 Abby 2 88 26.1 9.5 G L R 3.6.10
#> 8 Caitlin 6 88 23.0 8.8 G L R 7.7.6
#> 8.1 Caitlin 6 88 23.0 8.8 G L R 7.10.7
#> id1 id2
#> 3 B:8 B:1
#> 6 B:1 B:8
#> 6.1 B:1 B:1
#> 38 G:6 G:7
#> 20 G:2 G:2
#> 15 G:7 G:6
#> 20.1 G:3 G:3
#> 27 G:6 G:10
#> 8 G:7 G:6
#> 8.1 G:10 G:7
deal(Cards, 13) # A Bridge hand
#> [1] "AC" "AS" "KH" "3H" "8H" "9H" "10H" "8C" "AD" "7C" "4C" "KS"
#> [13] "6D"
shuffle(Cards)
#> [1] "JC" "6H" "5S" "4H" "KS" "3C" "7D" "2D" "QD" "2H" "10D" "8C"
#> [13] "QH" "JD" "5H" "4S" "5C" "AD" "8H" "10C" "KH" "4D" "QS" "AH"
#> [25] "4C" "7H" "KD" "JS" "KC" "6S" "7S" "7C" "10H" "9H" "9C" "JH"
#> [37] "AC" "3H" "2C" "3D" "6C" "5D" "AS" "8D" "10S" "3S" "2S" "6D"
#> [49] "9S" "QC" "9D" "8S"
model <- lm(width ~length * sex, data = KidsFeet)
KidsFeet |> head()
#> name birthmonth birthyear length width sex biggerfoot domhand
#> 1 David 5 88 24.4 8.4 B L R
#> 2 Lars 10 87 25.4 8.8 B L L
#> 3 Zach 12 87 24.5 9.7 B R R
#> 4 Josh 1 88 25.2 9.8 B L R
#> 5 Lang 2 88 25.1 8.9 B L R
#> 6 Scotty 3 88 25.7 9.7 B R R
resample(model) |> head()
#> width length sex
#> 1 8.593635 24.4 B
#> 2 9.420481 25.4 B
#> 3 9.444851 24.5 B
#> 4 8.763734 25.2 B
#> 5 9.141002 25.1 B
#> 6 9.090498 25.7 B
Boot <- do(500) * lm(width ~ length * sex, data = resample(KidsFeet))
#> Using parallel package.
#> * Set seed with set.rseed().
#> * Disable this message with options(`mosaic:parallelMessage` = FALSE)
df_stats(~ Intercept + length + sexG + length.sexG, data = Boot, sd)
#> response sd
#> 1 Intercept 1.34047944
#> 2 length 0.05289191
#> 3 sexG 1.99298145
#> 4 length.sexG 0.07979467
head(Boot)
#> Intercept length sexG length.sexG sigma r.squared F
#> 1 4.061731 0.1998271 -1.627390 0.055943489 0.3988934 0.4812676 10.824056
#> 2 3.396541 0.2307611 2.059666 -0.095689727 0.3896124 0.4227335 8.543525
#> 3 2.657438 0.2533723 1.783024 -0.071805843 0.3559803 0.3278852 5.691478
#> 4 3.890018 0.2063346 -6.725233 0.257798306 0.4043891 0.4622521 10.028755
#> 5 2.346084 0.2797201 -0.195402 -0.009428255 0.3656148 0.5828985 16.304145
#> 6 5.350636 0.1572710 -2.605837 0.090509618 0.3595442 0.5509561 14.314462
#> numdf dendf .row .index
#> 1 3 35 1 1
#> 2 3 35 1 2
#> 3 3 35 1 3
#> 4 3 35 1 4
#> 5 3 35 1 5
#> 6 3 35 1 6
summary(coef(model))
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> -0.6239 -0.1441 0.1142 0.8642 1.1225 3.8521