Number of births in the United States. There are several data sets covering different date ranges and obtaining data from different sources.

data(Births)

data(Births78)

data(Births2015)

data(BirthsSSA)

data(BirthsCDC)

Format

A data.frame with the following 8 variables.

date

Date

births

Number of births on date (integer)

wday

Day of week (ordered factor)

year

Year (integer)

month

Month (integer)

day_of_year

Day of year (integer)

day_of_month

Day of month (integer)

day_of_week

Day of week (integer)

Source

Details

There are some overlapping dates in the various data sets, but the number of births does not always agree due to the different sources of the data. See the examples.

See also

Birthdays for a data set aggregated at the state level.

Examples

data(Births78)
data(Births2015)
data(Births)
data(BirthsSSA)
data(BirthsCDC)
# date ranges for the different data sets
lapply(
  list(Births = Births, Births78 = Births78, Biths2015 = Births2015, BirthsSSA = BirthsSSA,
       BirthsCDC = BirthsCDC),
       function(x) range(x$date))
#> $Births
#> [1] "1969-01-01" "1988-12-31"
#> 
#> $Births78
#> [1] "1978-01-01" "1978-12-31"
#> 
#> $Biths2015
#> [1] "2015-01-01" "2015-12-31"
#> 
#> $BirthsSSA
#> [1] "2000-01-01" "2014-12-31"
#> 
#> $BirthsCDC
#> [1] "1994-01-01" "2003-12-31"
#> 
range(Births78$date)
#> [1] "1978-01-01" "1978-12-31"
range(Births2015$date)
#> [1] "2015-01-01" "2015-12-31"
range(Births$date)
#> [1] "1969-01-01" "1988-12-31"
range(BirthsSSA$date)
#> [1] "2000-01-01" "2014-12-31"
range(BirthsCDC$date)
#> [1] "1994-01-01" "2003-12-31"

# Births and Births78 have slightly different numbers of births

if(require(ggplot2)) {
  ggplot(data = Births, aes(x = date, y = births, colour = ~ wday)) +
    stat_smooth(se = FALSE, alpha = 0.8, geom = "line")
  ggplot(data = Births, aes(x = day_of_year, y = births, colour = ~ wday)) +
    geom_point(size = 0.4, alpha = 0.5) +
    stat_smooth(se = FALSE, geom = "line", alpha = 0.6, size = 1.5)
  if (require(dplyr)) {
    ggplot(
     data =  bind_cols(Births |> filter(year == 1978),
                       Births78 |> rename(births78 = births)),
     aes(x = births - births78)
     ) +
     geom_histogram(binwidth = 1)
  }
}
#> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
#>  Please use `linewidth` instead.
#> New names:
#>  `date` -> `date...1`
#>  `wday` -> `wday...3`
#>  `year` -> `year...4`
#>  `month` -> `month...5`
#>  `day_of_year` -> `day_of_year...6`
#>  `day_of_month` -> `day_of_month...7`
#>  `day_of_week` -> `day_of_week...8`
#>  `date` -> `date...9`
#>  `wday` -> `wday...11`
#>  `year` -> `year...12`
#>  `month` -> `month...13`
#>  `day_of_year` -> `day_of_year...14`
#>  `day_of_month` -> `day_of_month...15`
#>  `day_of_week` -> `day_of_week...16`


if(require(ggplot2)) {
  ggplot(data = Births, aes(x = date, y = births, colour = ~ wday)) +
    stat_smooth(se = FALSE, alpha = 0.8, geom = "line")
  ggplot(data = Births, aes(x = day_of_year, y = births, colour = ~ wday)) +
    geom_point(size = 0.4, alpha = 0.5) +
    stat_smooth(se = FALSE, geom = "line", alpha = 0.6, size = 1.5)
  if (require(dplyr)) {
    ggplot(
     data =  bind_cols(Births |> filter(year == 1978),
                       Births78 |> rename(births78 = births)),
     aes(x = births - births78)
     ) +
     geom_histogram(binwidth = 1)

    # SSA records more births than CDC
    ggplot(
     data =  bind_cols(BirthsSSA |> filter(year <= 2003) |> rename(SSA = births),
                       BirthsCDC |> filter(year >= 2000) |> rename(CDC = births)),
     aes(x = SSA - CDC)
     ) +
     geom_histogram(binwidth = 10)
  }
}
#> New names:
#>  `date` -> `date...1`
#>  `wday` -> `wday...3`
#>  `year` -> `year...4`
#>  `month` -> `month...5`
#>  `day_of_year` -> `day_of_year...6`
#>  `day_of_month` -> `day_of_month...7`
#>  `day_of_week` -> `day_of_week...8`
#>  `date` -> `date...9`
#>  `wday` -> `wday...11`
#>  `year` -> `year...12`
#>  `month` -> `month...13`
#>  `day_of_year` -> `day_of_year...14`
#>  `day_of_month` -> `day_of_month...15`
#>  `day_of_week` -> `day_of_week...16`
#> New names:
#>  `date` -> `date...1`
#>  `wday` -> `wday...3`
#>  `year` -> `year...4`
#>  `month` -> `month...5`
#>  `day_of_year` -> `day_of_year...6`
#>  `day_of_month` -> `day_of_month...7`
#>  `day_of_week` -> `day_of_week...8`
#>  `date` -> `date...9`
#>  `wday` -> `wday...11`
#>  `year` -> `year...12`
#>  `month` -> `month...13`
#>  `day_of_year` -> `day_of_year...14`
#>  `day_of_month` -> `day_of_month...15`
#>  `day_of_week` -> `day_of_week...16`