More Random Samples — resample • mosaic

These functions simplify and unify sampling in various ways.

resample(..., replace = TRUE)

deal(...)

shuffle(x, replace = FALSE, prob = NULL, groups = NULL, orig.ids = FALSE)

sample(x, size, replace = FALSE, ...)

# S3 method for default
sample(
  x,
  size,
  replace = FALSE,
  prob = NULL,
  groups = NULL,
  orig.ids = FALSE,
  ...
)

# S3 method for data.frame
sample(
  x,
  size,
  replace = FALSE,
  prob = NULL,
  groups = NULL,
  orig.ids = TRUE,
  fixed = names(x),
  shuffled = c(),
  invisibly.return = NULL,
  ...
)

# S3 method for matrix
sample(
  x,
  size,
  replace = FALSE,
  prob = NULL,
  groups = NULL,
  orig.ids = FALSE,
  ...
)

# S3 method for factor
sample(
  x,
  size,
  replace = FALSE,
  prob = NULL,
  groups = NULL,
  orig.ids = FALSE,
  drop.unused.levels = FALSE,
  ...
)

# S3 method for lm
sample(
  x,
  size,
  replace = FALSE,
  prob = NULL,
  groups = NULL,
  orig.ids = FALSE,
  drop.unused.levels = FALSE,
  parametric = FALSE,
  transformation = NULL,
  ...
)

Arguments

...: additional arguments passed to base::sample() or sample().
replace: Should sampling be with replacement?
x: Either a vector of one or more elements from which to choose, or a positive integer.
prob: A vector of probability weights for obtaining the elements of the vector being sampled.
groups: a vector (or variable in a data frame) specifying groups to sample within. This will be recycled if necessary.
orig.ids: a logical; should original ids be included in returned data frame?
size: a non-negative integer giving the number of items to choose.
fixed: a vector of column names. These variables are shuffled en masse, preserving associations among these columns.
shuffled: a vector of column names. these variables are reshuffled individually (within groups if groups is specified), breaking associations among these columns. examples.
invisibly.return: a logical, should return be invisible?
drop.unused.levels: a logical, should unused levels be dropped?
parametric: A logical indicating whether the resampling should be done parametrically.
transformation: NULL or a function providing a transformation to be applied to the synthetic responses. If NULL, an attempt it made to infer the appropriate transformation from the original call as recorded in x.

Details

These functions are wrappers around sample() providing different defaults and natural names.

Examples

# 100 Bernoulli trials -- no need for replace=TRUE
resample(0:1, 100)
#>   [1] 0 0 1 0 1 1 0 0 1 0 1 1 1 0 1 0 1 1 0 0 1 0 1 0 0 1 0 0 0 0 1 1 1 1 1 1 0
#>  [38] 0 0 0 1 0 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 1 1 1 0 0 0 1 1 0 1 0 1
#>  [75] 1 1 0 1 1 1 0 0 1 1 0 0 1 0 1 1 0 0 1 1 0 1 0 1 0 0
tally(resample(0:1, 100))
#> X
#>  0  1 
#> 52 48 
if (require(mosaicData)) {
Small <- sample(KidsFeet, 10)
resample(Small)
tally(~ sex, data=resample(Small))
tally(~ sex, data=resample(Small))
# fixed marginals for sex
tally(~ sex, data=Small)
tally(~ sex, data=resample(Small, groups=sex)) 
# shuffled can be used to reshuffle some variables within groups
# orig.id shows where the values were in original data frame.
Small <- mutate(Small, 
   id1 = paste(sex,1:10, sep=":"),  
   id2 = paste(sex,1:10, sep=":"))
resample(Small, groups=sex, shuffled=c("id1","id2"))
}
#>         name birthmonth birthyear length width sex biggerfoot domhand orig.id
#> 3       Zach         12        87   24.5   9.7   B          R       R   8.8.1
#> 6     Scotty          3        88   25.7   9.7   B          R       R   1.1.8
#> 6.1   Scotty          3        88   25.7   9.7   B          R       R   1.1.1
#> 38    Hayley          1        88   21.6   7.9   G          R       R  10.6.7
#> 20   Heather          3        88   25.5   9.5   G          R       R   6.2.2
#> 15     Julie         11        87   26.0   9.3   G          L       R   2.7.6
#> 20.1 Heather          3        88   25.5   9.5   G          R       R   6.3.3
#> 27      Abby          2        88   26.1   9.5   G          L       R  3.6.10
#> 8    Caitlin          6        88   23.0   8.8   G          L       R   7.7.6
#> 8.1  Caitlin          6        88   23.0   8.8   G          L       R  7.10.7
#>       id1  id2
#> 3     B:8  B:1
#> 6     B:1  B:8
#> 6.1   B:1  B:1
#> 38    G:6  G:7
#> 20    G:2  G:2
#> 15    G:7  G:6
#> 20.1  G:3  G:3
#> 27    G:6 G:10
#> 8     G:7  G:6
#> 8.1  G:10  G:7
deal(Cards, 13)    # A Bridge hand
#>  [1] "AC"  "AS"  "KH"  "3H"  "8H"  "9H"  "10H" "8C"  "AD"  "7C"  "4C"  "KS" 
#> [13] "6D" 
shuffle(Cards)
#>  [1] "JC"  "6H"  "5S"  "4H"  "KS"  "3C"  "7D"  "2D"  "QD"  "2H"  "10D" "8C" 
#> [13] "QH"  "JD"  "5H"  "4S"  "5C"  "AD"  "8H"  "10C" "KH"  "4D"  "QS"  "AH" 
#> [25] "4C"  "7H"  "KD"  "JS"  "KC"  "6S"  "7S"  "7C"  "10H" "9H"  "9C"  "JH" 
#> [37] "AC"  "3H"  "2C"  "3D"  "6C"  "5D"  "AS"  "8D"  "10S" "3S"  "2S"  "6D" 
#> [49] "9S"  "QC"  "9D"  "8S" 
model <- lm(width ~length * sex, data = KidsFeet)
KidsFeet |> head()
#>     name birthmonth birthyear length width sex biggerfoot domhand
#> 1  David          5        88   24.4   8.4   B          L       R
#> 2   Lars         10        87   25.4   8.8   B          L       L
#> 3   Zach         12        87   24.5   9.7   B          R       R
#> 4   Josh          1        88   25.2   9.8   B          L       R
#> 5   Lang          2        88   25.1   8.9   B          L       R
#> 6 Scotty          3        88   25.7   9.7   B          R       R
resample(model) |> head()
#>      width length sex
#> 1 8.593635   24.4   B
#> 2 9.420481   25.4   B
#> 3 9.444851   24.5   B
#> 4 8.763734   25.2   B
#> 5 9.141002   25.1   B
#> 6 9.090498   25.7   B
Boot <- do(500) * lm(width ~ length * sex, data = resample(KidsFeet))
#> Using parallel package.
#>   * Set seed with set.rseed().
#>   * Disable this message with options(`mosaic:parallelMessage` = FALSE)
df_stats(~ Intercept + length + sexG + length.sexG, data = Boot, sd)
#>      response         sd
#> 1   Intercept 1.34047944
#> 2      length 0.05289191
#> 3        sexG 1.99298145
#> 4 length.sexG 0.07979467
head(Boot)
#>   Intercept    length      sexG  length.sexG     sigma r.squared         F
#> 1  4.061731 0.1998271 -1.627390  0.055943489 0.3988934 0.4812676 10.824056
#> 2  3.396541 0.2307611  2.059666 -0.095689727 0.3896124 0.4227335  8.543525
#> 3  2.657438 0.2533723  1.783024 -0.071805843 0.3559803 0.3278852  5.691478
#> 4  3.890018 0.2063346 -6.725233  0.257798306 0.4043891 0.4622521 10.028755
#> 5  2.346084 0.2797201 -0.195402 -0.009428255 0.3656148 0.5828985 16.304145
#> 6  5.350636 0.1572710 -2.605837  0.090509618 0.3595442 0.5509561 14.314462
#>   numdf dendf .row .index
#> 1     3    35    1      1
#> 2     3    35    1      2
#> 3     3    35    1      3
#> 4     3    35    1      4
#> 5     3    35    1      5
#> 6     3    35    1      6
summary(coef(model))
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#> -0.6239 -0.1441  0.1142  0.8642  1.1225  3.8521