These functions simplify and unify sampling in various ways.

resample(..., replace = TRUE)

deal(...)

shuffle(x, replace = FALSE, prob = NULL, groups = NULL, orig.ids = FALSE)

sample(x, size, replace = FALSE, ...)

# S3 method for default
sample(
x,
size,
replace = FALSE,
prob = NULL,
groups = NULL,
orig.ids = FALSE,
...
)

# S3 method for data.frame
sample(
x,
size,
replace = FALSE,
prob = NULL,
groups = NULL,
orig.ids = TRUE,
fixed = names(x),
shuffled = c(),
invisibly.return = NULL,
...
)

# S3 method for matrix
sample(
x,
size,
replace = FALSE,
prob = NULL,
groups = NULL,
orig.ids = FALSE,
...
)

# S3 method for factor
sample(
x,
size,
replace = FALSE,
prob = NULL,
groups = NULL,
orig.ids = FALSE,
drop.unused.levels = FALSE,
...
)

# S3 method for lm
sample(
x,
size,
replace = FALSE,
prob = NULL,
groups = NULL,
orig.ids = FALSE,
drop.unused.levels = FALSE,
parametric = FALSE,
transformation = NULL,
...
)

## Arguments

...

additional arguments passed to base::sample() or sample().

replace

Should sampling be with replacement?

x

Either a vector of one or more elements from which to choose, or a positive integer.

prob

A vector of probability weights for obtaining the elements of the vector being sampled.

groups

a vector (or variable in a data frame) specifying groups to sample within. This will be recycled if necessary.

orig.ids

a logical; should original ids be included in returned data frame?

size

a non-negative integer giving the number of items to choose.

fixed

a vector of column names. These variables are shuffled en masse, preserving associations among these columns.

shuffled

a vector of column names. these variables are reshuffled individually (within groups if groups is specified), breaking associations among these columns. examples.

invisibly.return

a logical, should return be invisible?

drop.unused.levels

a logical, should unused levels be dropped?

parametric

A logical indicating whether the resampling should be done parametrically.

transformation

NULL or a function providing a transformation to be applied to the synthetic responses. If NULL, an attempt it made to infer the appropriate transformation from the original call as recorded in x.

## Details

These functions are wrappers around sample() providing different defaults and natural names.

## Examples

# 100 Bernoulli trials -- no need for replace=TRUE
resample(0:1, 100)
#>   [1] 0 0 1 0 1 1 0 0 1 0 1 1 1 0 1 0 1 1 0 0 1 0 1 0 0 1 0 0 0 0 1 1 1 1 1 1 0
#>  [38] 0 0 0 1 0 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 1 1 1 0 0 0 1 1 0 1 0 1
#>  [75] 1 1 0 1 1 1 0 0 1 1 0 0 1 0 1 1 0 0 1 1 0 1 0 1 0 0
tally(resample(0:1, 100))
#> X
#>  0  1
#> 52 48
if (require(mosaicData)) {
Small <- sample(KidsFeet, 10)
resample(Small)
tally(~ sex, data=resample(Small))
tally(~ sex, data=resample(Small))
# fixed marginals for sex
tally(~ sex, data=Small)
tally(~ sex, data=resample(Small, groups=sex))
# shuffled can be used to reshuffle some variables within groups
# orig.id shows where the values were in original data frame.
Small <- mutate(Small,
id1 = paste(sex,1:10, sep=":"),
id2 = paste(sex,1:10, sep=":"))
resample(Small, groups=sex, shuffled=c("id1","id2"))
}
#>         name birthmonth birthyear length width sex biggerfoot domhand orig.id
#> 3       Zach         12        87   24.5   9.7   B          R       R   8.8.1
#> 6     Scotty          3        88   25.7   9.7   B          R       R   1.1.8
#> 6.1   Scotty          3        88   25.7   9.7   B          R       R   1.1.1
#> 38    Hayley          1        88   21.6   7.9   G          R       R  10.6.7
#> 20   Heather          3        88   25.5   9.5   G          R       R   6.2.2
#> 15     Julie         11        87   26.0   9.3   G          L       R   2.7.6
#> 20.1 Heather          3        88   25.5   9.5   G          R       R   6.3.3
#> 27      Abby          2        88   26.1   9.5   G          L       R  3.6.10
#> 8    Caitlin          6        88   23.0   8.8   G          L       R   7.7.6
#> 8.1  Caitlin          6        88   23.0   8.8   G          L       R  7.10.7
#>       id1  id2
#> 3     B:8  B:1
#> 6     B:1  B:8
#> 6.1   B:1  B:1
#> 38    G:6  G:7
#> 20    G:2  G:2
#> 15    G:7  G:6
#> 20.1  G:3  G:3
#> 27    G:6 G:10
#> 8     G:7  G:6
#> 8.1  G:10  G:7
deal(Cards, 13)    # A Bridge hand
#>  [1] "AC"  "AS"  "KH"  "3H"  "8H"  "9H"  "10H" "8C"  "AD"  "7C"  "4C"  "KS"
#> [13] "6D"
shuffle(Cards)
#>  [1] "JC"  "6H"  "5S"  "4H"  "KS"  "3C"  "7D"  "2D"  "QD"  "2H"  "10D" "8C"
#> [13] "QH"  "JD"  "5H"  "4S"  "5C"  "AD"  "8H"  "10C" "KH"  "4D"  "QS"  "AH"
#> [25] "4C"  "7H"  "KD"  "JS"  "KC"  "6S"  "7S"  "7C"  "10H" "9H"  "9C"  "JH"
#> [37] "AC"  "3H"  "2C"  "3D"  "6C"  "5D"  "AS"  "8D"  "10S" "3S"  "2S"  "6D"
#> [49] "9S"  "QC"  "9D"  "8S"
model <- lm(width ~length * sex, data = KidsFeet)
#>     name birthmonth birthyear length width sex biggerfoot domhand
#> 1  David          5        88   24.4   8.4   B          L       R
#> 2   Lars         10        87   25.4   8.8   B          L       L
#> 3   Zach         12        87   24.5   9.7   B          R       R
#> 4   Josh          1        88   25.2   9.8   B          L       R
#> 5   Lang          2        88   25.1   8.9   B          L       R
#> 6 Scotty          3        88   25.7   9.7   B          R       R
#>      width length sex
#> 1 8.593635   24.4   B
#> 2 9.420481   25.4   B
#> 3 9.444851   24.5   B
#> 4 8.763734   25.2   B
#> 5 9.141002   25.1   B
#> 6 9.090498   25.7   B
Boot <- do(500) * lm(width ~ length * sex, data = resample(KidsFeet))
#> Using parallel package.
#>   * Set seed with set.rseed().
#>   * Disable this message with options(mosaic:parallelMessage = FALSE)
df_stats(~ Intercept + length + sexG + length.sexG, data = Boot, sd)
#>      response         sd
#> 1   Intercept 1.34047944
#> 2      length 0.05289191
#> 3        sexG 1.99298145
#> 4 length.sexG 0.07979467
#>   Intercept    length      sexG  length.sexG     sigma r.squared         F
#> 1  4.061731 0.1998271 -1.627390  0.055943489 0.3988934 0.4812676 10.824056
#> 2  3.396541 0.2307611  2.059666 -0.095689727 0.3896124 0.4227335  8.543525
#> 3  2.657438 0.2533723  1.783024 -0.071805843 0.3559803 0.3278852  5.691478
#> 4  3.890018 0.2063346 -6.725233  0.257798306 0.4043891 0.4622521 10.028755
#> 5  2.346084 0.2797201 -0.195402 -0.009428255 0.3656148 0.5828985 16.304145
#> 6  5.350636 0.1572710 -2.605837  0.090509618 0.3595442 0.5509561 14.314462
#>   numdf dendf .row .index
#> 1     3    35    1      1
#> 2     3    35    1      2
#> 3     3    35    1      3
#> 4     3    35    1      4
#> 5     3    35    1      5
#> 6     3    35    1      6
summary(coef(model))
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
#> -0.6239 -0.1441  0.1142  0.8642  1.1225  3.8521